In [None]:
# Import necessary libraries
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import Image
from time import sleep
import os
import numpy as np

from amb_sdk.sdk import DarwinSdk

In [None]:
# Set local path to files
path = '/Users/kmoore/amb-sdk/sets'

In [None]:
ls

In [None]:
# View data snippet
df = pd.read_csv(os.path.join(path,'cancer_train.csv'))
df.head()

In [None]:
# Login
ds = DarwinSdk()
ds.set_url('https://darwin-api.sparkcognition.com/v1/')
status, msg = ds.auth_login_user('username', 'password')
if not status:
    print(msg)

In [None]:
# Upload dataset
status, dataset = ds.upload_dataset(os.path.join(path,'cancer_train.csv'))
if not status:
    print(dataset)

In [None]:
# Build model
# You can change target to desired column
target = "Diagnosis"
model = target + "_model0"
status, job_id = ds.create_model(dataset_names = 'cancer_train.csv', \
                                 target = target, \
                                 model_name =  model, \
                                 max_train_time = '00:02'#,\
                                 #feature_eng = 'auc'
                                )
if status:
    ds.wait_for_job(job_id['job_name'])
else:
    print(job_id)

In [None]:
# Retrieve graph of built model
status, artifact = ds.analyze_model(model)
sleep(1)
if status:
    ds.wait_for_job(artifact['job_name'])
else:
    print(artifact)
status, model_graph = ds.download_artifact(artifact['artifact_name'])

In [None]:
# Show model graph
Image(model_graph['filename'])

In [None]:
# Test model
status, artifact = ds.run_model('cancer_train.csv', model)
sleep(1)
ds.wait_for_job(artifact['job_name'])

In [None]:
# Get predictions
status, prediction = ds.download_artifact(artifact['artifact_name'])

In [None]:
#Plot predictions vs actual
# - plot regression
#prediction.plot(title=target, legend=None)
#df[target].plot(legend=None)
#plt.legend(['Predicted','Actual'])

#Plot categorical
status, prediction = ds.download_artifact(artifact['artifact_name'])
df = pd.read_csv(os.path.join(path, 'cancer_train.csv'))
unq = prediction[target].unique()[::-1]
p = np.zeros((len(prediction),))
a = np.zeros((len(prediction),))
for i,q in enumerate(unq):
    p += i*(prediction[target] == q).values
    a += i*(df[target] == q).values
plt.plot(a)
plt.plot(p)
#plt.axis([0,1000,0,2])
plt.legend(['Actual','Predicted'])
plt.yticks([i for i in range(len(unq))],[q for q in unq]);

In [None]:
# Upload another dataset
test_data = 'cancer_test.csv'
status, dataset = ds.upload_dataset(os.path.join(path, test_data))
if not status:
    print(dataset)

In [None]:
# Test model with another dataset
status, artifact = ds.run_model(test_data, model)
sleep(1)
ds.wait_for_job(artifact['job_name'])

In [None]:
# Delete all models and datasets
ds.delete_all_datasets()
ds.delete_all_models()