In [None]:
import datarobot as dr
import pandas as pd
import os
from datetime import datetime
import numpy as np
import json

In [None]:
client = dr.Client()
deployment_id = '632b31a6c6927eec99dcba24'
deployment = dr.Deployment.get(deployment_id=deployment_id)
custom_metric_id = '642e0120fd42fe807517bd24'
custom_metric_api_url = 'deployments/{}/customMetrics/{}/fromJSON/'

In [None]:
def prepare_prediction_data(df, sample_size):
    df_sample = df.sample(n = sample_size)
    return df_sample

def make_predictions(deployment_id, df):
    job, predictions = dr.BatchPredictionJob.score_pandas(deployment_id, to_score_df_sample, read_timeout = 1000)
    return(predictions)

def prepare_actuals(df):
    actuals_v1 = df[['ASSOCIATION_ID', 'is_bad']]
    actuals_v2 = actuals_v1.rename(columns = {'ASSOCIATION_ID': 'association_id', 'is_bad': 'actual_value'})
    actuals = actuals_v2.astype({'association_id': str})
    return actuals 

def calculate_profit(row):
    if row['prediction'] == 1:
        return 0
    elif (row['prediction'] == 0 and row['actual_value'] == 1):
        return -row['loan_amt']
    elif (row['prediction'] == 0 and row['actual_value'] == 0):
        return (row['loan_amt']*float(row['int_rate'].strip('%')))/100

def prepare_custom_metric_data(predictions, actuals):
    predictions = predictions.astype({'ASSOCIATION_ID_x': str})
    combined_df = predictions.merge(actuals, how='inner', left_on='ASSOCIATION_ID_x', right_on='association_id')
    custom_metric_data = combined_df[['association_id', 'is_bad_PREDICTION', 'actual_value', 'loan_amt', 'int_rate']].rename(columns = {'is_bad_PREDICTION': 'prediction'})
    custom_metric_data = custom_metric_data.assign(profit=custom_metric_data.apply(calculate_profit, axis=1))
    custom_metric_data['prediction_date'] = (datetime.now().strftime('%Y%m%d'))
    return custom_metric_data

def submit_custom_metric_data(df):
    rows = [
        {'timestamp': prediction_date, 'value': profit}
        for prediction_date, profit in zip(df['prediction_date'], df['profit'])
    ]
    response = client.post(
        custom_metric_api_url.format(deployment_id, custom_metric_id),
        json={'modelPackageId': '6388db7cfd0045e2d50cdbb5', 'buckets': rows}
    )
    response.raise_for_status()

def write_file(filename, data):
    # Write log file
    if os.path.isfile(filename):
        with open(filename, 'a') as f:
            f.write('\n' + data)
    else:
        with open(filename, 'w') as f:
            f.write(data)

In [None]:
now = datetime.now()
write_file('storage/log.txt', 'Script called at {time}\n'.format(time = now))
to_score_df = pd.read_csv('storage/lending_club_scoring_data.csv')
to_score_df_sample = prepare_prediction_data(to_score_df, 100)
predictions = make_predictions(deployment_id, to_score_df_sample)
actuals = prepare_actuals(to_score_df_sample)
custom_metric_data = prepare_custom_metric_data(predictions, actuals)
custom_metric_data.to_csv("custom_metric_data.csv")
deployment.submit_actuals(actuals)
submit_custom_metric_data(custom_metric_data)

Streaming DataFrame as CSV data to DataRobot
Created Batch Prediction job ID 654a5fdb333fdd50c1d09313
Waiting for DataRobot to start processing


Job has started processing at DataRobot. Streaming results.
