In [None]:
# Import necessary libraries
%matplotlib inline
import pandas as pd
from amb_sdk.sdk import DarwinSdk
from time import sleep

In [None]:
# Set Darwin SDK
ds = DarwinSdk()

In [None]:
# Register user [if you are not yet resigtered]
"""
api_key = ''
status, msg = ds.auth_login('password', api_key)
if not status:
    print(msg)
status, msg = ds.auth_register_user('username', 'password','email@emailaddress.com')
if not status:
    print(msg)
"""

In [None]:
# Login, User, ONE
status, msg = ds.auth_login_user('username','login')
if not status:
    print(msg)

In [None]:
# Set local path to files - Please update to reflect your machine
path = '/Users/kmoore/amb-sdk/sets/risk_example/'

In [None]:
# upload failure date data
(code, response) = ds.upload_dataset(path+'sets/failures.csv', 'unittest-failures-data')

In [None]:
#upload timeseries data
(code, response) = ds.upload_dataset(path+'sets/sensor_ts.csv', 'unittest-timeseries-data')

In [None]:
# lead time is unit in seconds; leadtime is the half width of the riskindex
lead_time = 3600 * 24 * 3.5 # half week risk 
(code, response) = ds.create_risk_info('unittest-failures-data', 'unittest-timeseries-data',
                                        return_column="Date Returned to Service",
                                        shutdown_column="Shutdown Date",
                                        lead_time=lead_time, functional_form="sigmoid")

ds.wait_for_job(response['job_name'])

In [None]:
# download the artifact (riskindex)
status, response = ds.download_artifact(response['artifact_name'])

In [None]:
df_risk = pd.read_csv(response['filename'])

In [None]:
# read timeseries data for datetime index
df_ts = pd.read_csv(path+'sets/sensor_ts.csv')
df_ts['datetime'] = pd.to_datetime(df_ts['datetime'], errors='coerce')

# concatenate two df
df = pd.concat([df_ts, df_risk], axis=1)

# set datetime index
df= df.set_index('datetime')

In [None]:
df['risk'].plot()

In [None]:
# see the failures
df_failure = pd.read_csv(path+'sets/failures.csv')
df_failure

In [None]:
# let's zoom-in the date around 2012-02-10 failure date
df['risk']['2012-01-25':'2012-02-12'].plot()

In [None]:
#View full file upon which you can build a model and convert to csv
df.to_csv("assetrisk.csv")
df.head()

In [None]:
#Upload Full CSV to Darwin for predictions
(code, response) = ds.upload_dataset(path+'assetrisk.csv')

In [None]:
# Build model
target = "risk"
model = target + "_model0"
status, job_id = ds.create_model(dataset_names = 'assetrisk.csv', \
                                 target = target, \
                                 model_name =  model, \
                                 max_train_time = '00:10'#,\
                                 #feature_eng = 'auc'
                                )
if status:
    ds.wait_for_job(job_id['job_name'])
else:
    print(job_id)

In [None]:
#Run Predictions    
status, artifact = ds.run_model('assetrisk.csv', model)
sleep(1)
ds.wait_for_job(artifact['job_name'])

In [None]:
#Get predictions
status, prediction = ds.download_artifact(artifact['artifact_name'])
if status:
    ds.wait_for_job(job_id['job_name'])
else:
    print(job_id)

In [None]:
#Rename
prediction = prediction.rename(columns={target:target+'_pred' })

df = df.reset_index()

# concatenate two df
df = pd.concat([df, prediction], axis=1)

# set datetime index
df= df.set_index('datetime')

In [None]:
#Plot predictions vs actual
df[target].plot()
df[target+'_pred'].plot()
#plt.legend(['Predicted','Actual'])

In [None]:
# Delete all models and datasets
#ds.delete_all_datasets()
#ds.delete_all_models()