## Walkthrough of how to output helpful insights for ML models

In [752]:
import datarobotx as drx
import datarobot as dr
import pandas as pd
import numpy as np
import datetime

from feature_impact_helpers import *
from lift_chart_helpers import *
from insights_over_time_helpers import *
from data_table_helpers import *

from importlib import reload
%reload_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

### Authenticate credentials

In [173]:
dr.Client()

<datarobot.rest.RESTClientObject at 0x134915750>

## 1. Retrieve and prep data

In [877]:
project_id = '642dbc74ac83191fc267ef3d' #'64289f6808f30af7340d660c'
model_id = '642dbe6dd3b89b35a56d3ba8' #'6428a0d8763f72552338e7d6' 
deployment_id = None

# grab DataRobot project
project = dr.Project.get(project_id)

In [871]:
# read in dataset
filename = 'feature_discover_data.csv'
project.download_feature_discovery_dataset(file_name=filename)
df = pd.read_csv(filename)

In [872]:
# subset data to after June 2021
df2 = df.loc[pd.to_datetime(df['Prediction_Point'])>=pd.to_datetime('2021-01-01'), :]

### Retrive DRX model object and deploy model

In [875]:
drx_model = drx.AutoMLModel.from_project_id(project_id)

VBox()

In [878]:
if deployment_id:
    drx_deployment = drx.Deployment.from_url(f'https://app.datarobot.com/deployments/{deployment_id}/')
else:
    today = datetime.datetime.now()
    drx_deployment = drx_model.deploy(name=f'Customer Churn - {today}')

VBox()

### Get batch predictions and prediction explanations

In [865]:
drx.Context()._deployments_use_rt_endpoint=False
preds = drx_deployment.predict_proba(
    X=df2, 
    max_explanations=20
)

VBox()

In [881]:
# merge preds with original data
preds2 = preds.merge(
    df2.reset_index(drop=True),
    how='left',
    left_index=True,
    right_index=True,
)

### Use DRX helper functions to melt and reshape our predictions

In [882]:
melted = drx.melt_explanations(preds2, id_vars=list(df2.columns)+['class_1'])
reshaped = drx.featurize_explanations(preds2)

## 2. Now it's time to create our insights

### Plot feature impact

In [883]:
# aggregate feature impact
plot_feature_impact(melted, height=500)

In [887]:
# feature impact split by strength
plot_signed_feature_impact(melted, height=500, n=25)

In [888]:
# feature impact split by positive class predictions
threshold = 0.15
positive_preds = melted.loc[melted['class_1']>=threshold, :].copy()
plot_signed_feature_impact(positive_preds, height=500, n=25)

### Plot lift chart and prediction explanations

In [889]:
# plot lift chart
plot_lift_chart(preds2, project_id, bins=15)

In [890]:
# plot lift chart with overlaid prediction explanations
plot_prediction_explanations_and_lift_chart(melted, project_id, showlegend=False, max_features=6, bins=12)

In [891]:
plot_histogram(
    preds2,
    project.id,
    feature='State',
    bins=12,
    cutoff=0.1,
    split_by_predicted_class=True,
    showlegend=True,
    height=600,
)

In [894]:
# plot predictions and prediction explanations over time
date_col = 'Prediction_Point'
prep_and_plot_pe_over_time(melted, project_id, date_col, freq='QS', showlegend=False, max_features=6, height=600)

### Output pandas df with highlighted prediction explanations

In [741]:
cols_to_keep = [
    'row_id',
    'Customer_Name', 
    'Customer_Since_Date',
    'ARR', 
    'Contract_Duration', 
    'Renewal_Date', 
    'Employee_Count', 
    'Annual_Revenue', 
    'Products_Purchased', 
    'Industry',
    'Zip_Code', 
    'City', 
    'State', 
    'Latitude', 
    'Longitude',
    'Customers[Products_Purchased] (latest)',
    'Product_Usage[UserID] (1 month unique count)',
    'feature_name',
    'strength',
]

In [895]:
melted_sample = melted.loc[:, melted.columns.isin(cols_to_keep)]
plot_overlaid_prediction_explanations(melted_sample, sample=10)

Unnamed: 0_level_0,Customer_Name,Customer_Since_Date,ARR,Contract_Duration,Renewal_Date,Employee_Count,Annual_Revenue,Industry,State,City,Zip_Code,Latitude,Longitude,Products_Purchased,Product_Usage[UserID] (1 month unique count)
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,Advanced Analysis Data,2010-08-01,44916,36,2021-08-01,41368,3814084822,healthcare,OH,Toledo,43657,41.652914,-83.537817,enterprise,3.0
1,Advanced Application Consulting,2019-10-01,89315,36,2022-10-01,46506,3246251142,retail,CO,Golden,80419,12.66562,,starter_pack,2.0
2,Advanced Construction Net,2021-12-01,290890,12,2022-12-01,5371,580268357,telecom,TX,Portland,78374,27.881883,,business_essentials,1.0
3,Advanced Consulting Net,2021-07-01,22369,12,2022-07-01,224389,20591295545,manufacturing,PA,Springtown,18081,40.556489,,starter_pack,1.0
4,Advanced Future Pacific,2011-01-01,424657,12,2022-01-01,79230,4786179478,other,NE,Brunswick,68720,42.337469,-97.970608,starter_pack,2.0
5,Advanced Future Pacific,2011-01-01,181765,12,2023-01-01,79230,4786179478,other,NE,Brunswick,68720,42.337469,-97.970608,starter_pack,0.0
6,Advanced Omega,2017-11-01,99865,24,2021-11-01,159844,23454823203,financial services,MO,Maitland,64466,40.200826,-95.074419,business_essentials,3.0
7,Advanced Omega,2017-11-01,227381,12,2022-11-01,159844,23454823203,financial services,MO,Maitland,64466,40.200826,-95.074419,business_essentials,2.0
8,Advanced Software Net,2017-11-01,282127,36,2021-11-01,48665,3764544958,manufacturing,PA,Elkins Park,19027,40.077055,-75.126841,starter_pack,1.0
9,Advanced Speed West,2010-10-01,204048,12,2021-10-01,210252,9336351167,other,TN,Murfreesboro,37132,35.84604,,enterprise,0.0


## Let's look at how our features may have drifted over time

In [847]:
plot_values_over_time(
    melted, 
    project_id=project_id,
    freq='Q',
    date_col=date_col, 
    feature='Product_Usage (1 month count)',
    showlegend=True,
)

In [851]:
plot_values_over_time(
    melted, 
    project_id=project_id,
    freq='MS',
    date_col=date_col, 
    feature='Products_Purchased',
    showlegend=True,
)