## Base Scriptlets Notebook

In [None]:
# set the project name
domain = 'healthcare'
project_name = 'factory'
VERSION = 'v05'

In [None]:
# set the Domain Contract URI if using a GitHub repository as reference domain contract
_git_hub = ''
_git_repo = ''
_git_project_path = ''

DOMAIN_CONTRACTS_URL = f"https://raw.githubusercontent.com/{_git_hub}/{_git_repo}/master/{_git_project_path}/contracts/"

In [None]:
# saves you having to use print as all exposed variables are printed in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# core libraries
import numpy as np
import pandas as pd
import os
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt

# matpolitlib config
%matplotlib inline

# Style setup
plt.style.use('ggplot')
sns.set()

# Pandas setup
pd.set_option('max_colwidth', 200)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 99)
pd.set_option('expand_frame_repr', True)
# Limiting floats output to 3 decimal points
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))

# suppress warning message
import warnings
warnings.filterwarnings('ignore')

In [None]:
%reload_ext autoreload
%autoreload 2

# Connector Contract
from aistac.handlers.abstract_handlers import ConnectorContract
from aistac.components.aistac_commons import DataAnalytics

# Controller and Events
from ds_engines import Controller
from ds_engines import EventBookPortfolio

# Discovery Components
from ds_discovery import SyntheticBuilder
from ds_discovery import Transition
from ds_discovery import Wrangle
from ds_discovery import FeatureCatalog
from ds_discovery import DataDrift
from ds_discovery.components.commons import Commons
from ds_discovery.components.discovery import DataDiscovery

import aistac
import ds_discovery
import ds_engines
print('Foundation: {}'.format(aistac.__version__))
print('Transition: {}'.format(ds_discovery.__version__))
print('Engines   : {}'.format(ds_engines.__version__))

In [None]:
# clean out any old environments
for key in os.environ.keys():
    if key.startswith('HADRON'):
        del os.environ[key]

In [None]:
# set environment variables

# NOTE: if running on windows, PWD doesn't exist so you need to manually set it
# uncomment the line bleow and add your jupyter project root path
# os.environ['PWD'] = Path('<<Your_Project_path>>').as_posix()

os.environ['HADRON_PM_PATH'] = Path('${PWD}', domain, project_name, 'contracts').as_posix() 
# os.environ['HADRON_PM_MODULE'] = 'ds_discovery.handlers.pandas_handlers'
# os.environ['HADRON_PM_PERSIST_HANDLER'] = 'PandasPersistHandler'
os.environ['HADRON_PM_TYPE'] = 'json'


# Pandas
os.environ['HADRON_DEFAULT_PATH'] = Path('${PWD}', domain, project_name, 'data', 'persist').as_posix()
os.environ['HADRON_DEFAULT_MODULE'] = 'ds_discovery.handlers.pandas_handlers'
os.environ['HADRON_DEFAULT_SOURCE_HANDLER'] = 'PandasSourceHandler'
os.environ['HADRON_DEFAULT_PERSIST_HANDLER'] = 'PandasPersistHandler'


# EventBook
# os.environ['HADRON_DEFAULT_PATH'] = 'eb://'
# os.environ['HADRON_DEFAULT_MODULE'] = 'ds_engines.handlers.event_handlers'
# os.environ['HADRON_DEFAULT_SOURCE_HANDLER'] = 'EventSourceHandler'
# os.environ['HADRON_DEFAULT_PERSIST_HANDLER'] = 'EventPersistHandler'

# os.environ['HADRON_TRANSITION_PERSIST_PATH'] = Path('${PWD}', domain, project_name, 'data', 'persist').as_posix()
# os.environ['HADRON_TRANSITION_PERSIST_MODULE'] = 'ds_discovery.handlers.pandas_handlers'
# os.environ['HADRON_TRANSITION_PERSIST_HANDLER'] = 'PandasPersistHandler'


# S3
# os.environ['HADRON_PM_PATH'] = f's3://aistac-discovery-persist/{domain}/{project_name}/contracts'
# os.environ['HADRON_PM_TYPE'] = 'json'

# os.environ['HADRON_DEFAULT_PATH'] = 's3://aistac-discovery-persist/{domain}/{project_name}/data/persist/'
# os.environ['HADRON_DEFAULT_MODULE'] = 'ds_connectors.handlers.aws_s3_handlers'
# os.environ['HADRON_DEFAULT_SOURCE_HANDLER'] = 'AwsS3SourceHandler'
# os.environ['HADRON_DEFAULT_PERSIST_HANDLER'] = 'AwsS3PersistHandler'

# os.environ['HADRON_TRANSITION_SOURCE_PATH'] = 's3://aistac-discovery-persist/{domain}/{project_name}/data/raw/'


# Managed Content
# os.environ['AISTAC_PM_PATH'] = "c12emc:///hadron/hello-world/contracts?api_endpoint=${API_ENDPOINT}&token=${TOKEN}"
# os.environ['AISTAC_PM_TYPE'] = 'json'
# os.environ['AISTAC_PM_MODULE'] = 'ds_connectors.handlers.managed_content_handlers'
# os.environ['AISTAC_PM_HANDLER']= 'ManagedContentPersistHandler'

# os.environ['AISTAC_DEFAULT_PATH'] = "c12emc:///hadron/hello-world/data?api_endpoint=${API_ENDPOINT}&token=${TOKEN}"
# os.environ['AISTAC_DEFAULT_MODULE'] = 'ds_connectors.handlers.managed_content_handlers'
# os.environ['AISTAC_DEFAULT_SOURCE_HANDLER'] = 'ManagedContentSourceHandler'
# os.environ['AISTAC_DEFAULT_PERSIST_HANDLER']= 'ManagedContentPersistHandler'

# run time enviroment variables
# os.environ['TOKEN'] = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJjb2duaXRpdmVzY2FsZS5jb20iLCJhdWQiOiJjb3J0ZXgiLCJzdWIiOiJzbXVsbGFuZ2kiLCJ0ZW5hbnQiOiJkZW1vIiwiYmVhcmVyIjoicHVibGljIiwia2V5IjoibkFJaE4zSUIybmVpQlVZeWVpd3VBRTNpZmwwbHRzeW8iLCJleHAiOjE2MDE1MzAwNjIsImFjbCI6eyIuKiI6WyJSRUFEIiwiUlVOIiwiV1JJVEUiLCJERUxFVEUiXSwiL3YzL2NhdGFsb2cvLioiOlsiUkVBRCIsIlJVTiIsIldSSVRFIiwiREVMRVRFIl0sIi92My9hZ2VudHMvZW52aXJvbm1lbnRzL2NvcnRleC9kZWZhdWx0IjpbIlJFQUQiLCJSVU4iLCJXUklURSJdLCIvdjMvYWdlbnRzL2Vudmlyb25tZW50cy8uKiI6WyJERU5ZIl0sIi92Mi9hZG1pbi8uKiI6WyJERU5ZIl0sIi92Mi90ZW5hbnRzLy4qIjpbIkRFTlkiXSwiL3YyL2FjY291bnRzLy4qIjpbIkRFTlkiXSwiL3YyL2FjY291bnRzL3Rva2Vucy8uKiI6WyJSRUFEIiwiUlVOIiwiV1JJVEUiXSwiL3YyL3RlbmFudHMvc2VjcmV0cy8uKiI6WyJSRUFEIiwiUlVOIiwiV1JJVEUiXSwiL3YyL3RlbmFudHMvY3VycmVudC11c2VyLWRldGFpbHMiOlsiUkVBRCJdLCIvdjMvZ3JhcGgvLioiOlsiUkVBRCIsIlJVTiIsIldSSVRFIiwiREVMRVRFIl19LCJpYXQiOjE2MDAzMjA0NjJ9.EQk5VNdQJM2l7yAvTaJiuZj_HjVxL77C5yL-9OFROX4"
# os.environ['API_ENDPOINT'] = 'https://api.prod.accelerators-dci.insights.ai/'
