# This notebook will help you register any Rasgo datasets checked into your Version Control system

In [None]:
# imports
import pyrasgo
import yaml

In [None]:
# Parameters
# Your PyRasgo API Key
PYRASGO_API_KEY = '...' 
# The path to the repo we'll write files to
WORKING_DIR = '/path/to/repo/dir'
# The branch to commit to. This branch must already exist! 
GIT_BRANCH = 'master'

In [None]:
# pyrasgo connection
rasgo = pyrasgo.connect(PYRASGO_API_KEY)

## Get latest from your branch

In [None]:
# cd into your git dir 
%cd $WORKING_DIR

In [None]:
# Check out the GIT_BRANCH branch and make sure it's up to date
!git fetch
!git checkout $GIT_BRANCH 
!git pull 
# NOTE: This should not fail! If this fails, ensure that your local repo does not have any changes that differ from origin

## Generate YML Files

In [None]:
import os

# Get all Rasgo Datasets
datasets = rasgo.get.datasets(include_community=False)

# Rasgo dataset repo directory name
    # At the root of your directory, you should have a subdirectory that contains the YAML representations for your Rasgo 
    # datasets
RASGO_DIR_NAME = "rasgo"
if not os.path.exists(f"{RASGO_DIR_NAME}/datasets"):
    os.mkdir(f"{RASGO_DIR_NAME}/datasets")

# generate and write out yaml file representation for each dataset
for ds in datasets:
    file_path = f"{RASGO_DIR_NAME}/datasets/{ds.resource_key}.yaml"
    try:
        ds.generate_yaml(file_path=file_path)
    except Exception as _: 
        # If bad stuff happens, let's still write a file. Git holds on to old versions of a file!
        with open(file_path, "w") as yaml_file:
            err_msg = f"Failed to generate offline yaml representation of Dataset {ds.resource_key}"
            yaml_file.write(err_msg)
            print(err_msg)

In [None]:
# Get all of your Transforms
transforms = rasgo.get.transforms(include_community=False)

if not os.path.exists(f"{RASGO_DIR_NAME}/transforms"):
    os.mkdir(f"{RASGO_DIR_NAME}/transforms")

# generate and write out yaml file representation for each transform
for tr in transforms:
    
    # We'll write Transforms as 2 files: the SQL source and the Transform model itself
    # Let's make a folder to hold them
    transform_folder = f"{RASGO_DIR_NAME}/transforms/{tr.name}"
    if not os.path.exists(transform_folder):
        os.mkdir(transform_folder)
    
    try:
        # convert the file to YML model
        tr_yaml = tr.to_yaml()
        
        # Write the model file
        with open(f"{transform_folder}/{tr.name}.yaml", "w") as yaml_file:
            yaml_file.write(tr_yaml)
            
        # Write the source code
        with open(f"{transform_folder}/{tr.name}.sql", "w") as sql_file:
            sql_file.write(tr.source_code)
        
        print(f"Transform \"{tr.name}\" files written to {transform_folder}")
        
    except Exception as _: 
        # If bad stuff happens, let's still write a file. Git holds on to old versions of a file!
        with open(f"{transform_folder}/{tr.name}.yaml", "w") as yaml_file:
            err_msg = f"Failed to generate offline yaml representation of Transform({tr.name})"
            yaml_file.write(err_msg)
            print(err_msg)


## Commit Rasgo YAML dataset representations to your repository

In [None]:
from datetime import datetime 

!git add -A 
!git status -s # show which files are being updated
!git commit -m f'Rasgo Datasets Sync | {datetime.now()}'
!git push
