In [1]:
import os
import pickle
import numpy as np
import pandas as pd

from orchestralib import OrchestraClient
from sklearn.ensemble import RandomForestRegressor

In [2]:
cwd = os.getcwd()

orchestra = OrchestraClient(
    api_key='74738ff5-5367-5958-9aee-98fffdcd1876',
    organization='my organization',
    environment='development')

In [3]:
# Code Block 1: Generate training data set and save to csv
orchestra.log_code_block('generate_training_data').start()
# create a dataframe with 4 columns of random numbers 
np.random.seed(5757) # set random seed if you want your random numbers to match those here
df = pd.DataFrame(data=np.random.random(size=(50,4)), columns=['A','B', 'C', 'D'])
# add an output column that is the sum of the other four columns
df['Y'] = df.sum(axis=1)
# save to csv
cwd = os.getcwd()
filename1 = cwd + '/' + 'training_data.csv'
df.to_csv(filename1)
orchestra.log_features(df, filename1, output_for='generate_training_data')
orchestra.log_code_block('generate_training_data').end()

In [5]:
# Code Block 2: Load training data, train ML model, save trained model
orchestra.log_code_block('train_simple_rf').start()
# load data
df = pd.read_csv(filename1, index_col=0)
# define input and outcome columns
outcome_col = 'Y'
input_cols = df.columns[df.columns != outcome_col]
X = df[input_cols]
Y = df[outcome_col]

# train ML model
rf = RandomForestRegressor()
rf.fit(X,Y)

# save pickled ML model to local file system
rf_filename = cwd + '/' + 'simple_example_rf.pkl'
pickle.dump(rf, open(rf_filename, 'wb'))
orchestra.log_model('simple_rf', rf, input_features=X, output_features=Y, training_code = 'train_simple_rf', path = rf_filename)
orchestra.log_code_block('train_simple_rf').end()

In [6]:
print(orchestra.get_yaml())

apiVersion: orchestraml.com/models/v1alpha1
kind: MLDependencyMap
metadata:
  environment: development
features:
- name: A
  datatype: float64
  dataSource: /Users/jenngamble/Documents/Novus/Code/orchestralib-examples/Example
    Notebooks/training_data.csv
  createdUsing:
    codeblock: generate_training_data
- name: B
  datatype: float64
  dataSource: /Users/jenngamble/Documents/Novus/Code/orchestralib-examples/Example
    Notebooks/training_data.csv
  createdUsing:
    codeblock: generate_training_data
- name: C
  datatype: float64
  dataSource: /Users/jenngamble/Documents/Novus/Code/orchestralib-examples/Example
    Notebooks/training_data.csv
  createdUsing:
    codeblock: generate_training_data
- name: D
  datatype: float64
  dataSource: /Users/jenngamble/Documents/Novus/Code/orchestralib-examples/Example
    Notebooks/training_data.csv
  createdUsing:
    codeblock: generate_training_data
- name: Y
  datatype: float64
  dataSource: /Users/jenngamble/Documents/Novus/Code/orchestr