In [1]:
import sys
sys.path.append("../")
%load_ext autoreload
%autoreload 2

## [Optional] Enable debug logging
#import logging
#logging.getLogger().setLevel(logging.DEBUG)


In [64]:
from ailens.client.truera_workspace import BasicAuthentication
from ailens.client.truera_workspace import TrueraWorkspace
connection_string = "http://api-truera.westus2.cloudapp.azure.com:8000/public/artifacts"
auth = BasicAuthentication("ailens", "ailens123")
# setting the current project is optional
current_project = "fico"
tru = TrueraWorkspace(connection_string, auth, current_project)

# Get various metadata from the system
## Get all projects

In [34]:
tru.get_projects()

['fico',
 'lendingclub_large',
 'sklearn_california_housing',
 'homecredit',
 'lendingclub',
 'PIMA Diabetes',
 'LendingClub with splits',
 'Personal loan GBM',
 'dot',
 'LendingClubMonitoring',
 'FicoNB',
 'Adult Census',
 'simple_testing',
 'pima_diabetes',
 'HBS',
 'LendingClub with versions',
 'Testing DataRobot']

## Get all models in the project

In [35]:
tru.get_models()

['logistic', 'xgboost', 'h2o', 'logistic-pmml']

### NOTE: Current project can be changed

In [55]:
tru.set_current_project("FicoNB", True)
tru.get_models()

['XGBoost_2', 'XGBoost_3', 'XGBoost_1', 'RandomForest_3']

# Train a new model for the project

In [37]:
import pandas as pd 
import numpy as np 

# read the data
int_column_names = ['ExternalRiskEstimate', 'MSinceOldestTradeOpen',
       'MSinceMostRecentTradeOpen', 'AverageMInFile', 'NumSatisfactoryTrades',
       'NumTrades60Ever2DerogPubRec', 'NumTrades90Ever2DerogPubRec',
       'PercentTradesNeverDelq', 'MSinceMostRecentDelq',
       'MaxDelq2PublicRecLast12M', 'MaxDelqEver', 'NumTotalTrades',
       'NumTradesOpeninLast12M', 'PercentInstallTrades',
       'MSinceMostRecentInqexcl7days', 'NumInqLast6M', 'NumInqLast6Mexcl7days',
       'NetFractionRevolvingBurden', 'NetFractionInstallBurden',
       'NumRevolvingTradesWBalance', 'NumInstallTradesWBalance',
       'NumBank2NatlTradesWHighUtilization', 'PercentTradesWBalance']
df = pd.read_csv("../fico/original/heloc_dataset_v1.csv", 
                 dtype={column: np.int for column in int_column_names})
# separate label and input
df['RiskPerformance'] = df['RiskPerformance'].transform(lambda risk: 1 if risk=='Good' else 0)
Y = df['RiskPerformance']
X = df
X.drop('RiskPerformance' , axis=1, inplace=True)

In [38]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn import preprocessing

X_scaled = preprocessing.scale(X)
# create train test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y.values, test_size=0.4, random_state=0)

params_xgb = {
        'n_estimators': 100,
        'max_depth': 3,
        'min_child_weight': 5,
        'subsample': 0.7,
        'colsample_bytree': 1
}

xgb = XGBClassifier(**params_xgb)

xgb.fit(X_train, y_train)

score = xgb.score(X_test, y_test)
print("Score: %s" % score)

Score: 0.7165391969407265


In [44]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print("Score: %s" % score)

Score: 0.7160611854684512


# Save new data split and model

In [57]:
tru.set_current_data_collection("FicoNB", True)
tru.get_data_collections()

['FicoNB']

In [42]:

df_x_scaled = pd.DataFrame(data=X_scaled, columns=X.columns)
tru.save_data_split("train", df_x_scaled, label_data=Y, split_type="all")

/tmp/tmpkv_edojv 4752723
/tmp/tmp75i2xv29 20918


In [65]:
model_name = "XGBoost_7"
tru.save_python_model(model_name, xgb)

Done
Verification Done
Put resource done.
Model uploaded to: http://api-truera.westus2.cloudapp.azure.com:8000/p/FicoNB/m/FicoNB*XGBoost_7/


In [46]:
model_name = "RandomForest_3"
tru.save_python_model(model_name, clf)

Done
Verification Done
Put resource done.


## Delete Models or Data Collections

In [9]:
for m in tru.get_models():
    tru.delete_model(m)

Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 
Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 
Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 
Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 
Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 
Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 


In [142]:
tru.delete_data_split("train")
tru.delete_data_collection("train")

Delete resource succeeded. Project_id: FicoNB intra_artifact_path: 
