## Walkthrough of how to output helpful insights for ML models

In [120]:
import datarobotx as drx
import datarobot as dr
import pandas as pd
import numpy as np
import datetime

from feature_impact_helpers import *
from lift_chart_helpers import *
from insights_over_time_helpers import *
from data_table_helpers import *

from importlib import reload
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')

### Authenticate credentials

In [173]:
dr.Client()

<datarobot.rest.RESTClientObject at 0x134915750>

## 1. Retrieve and prep data

In [6]:
df = pd.read_csv('/Users/justin.swansburg/Downloads/Customer_Churn_with_AFE.csv')
df2 = df.loc[pd.to_datetime(df['Prediction_Point'])>=pd.to_datetime('2022-01-01'), :]

In [147]:
project_id = '64289f6808f30af7340d660c'
model_id = '6428a0d8763f72552338e7d6' 

### Retrive DRX model object and deploy model

In [153]:
drx_model = drx.AutoMLModel.from_project_id(project_id)

VBox()

In [155]:
drx_model

VBox()

VBox()

In [154]:
#drx_deployment = drx.Deployment.from_url(f'https://staging.datarobot.com/deployments/{deployment_id}/')
today = datetime.datetime.now()
drx_deployment = drx_model.deploy(name=f'Customer Churn - {today}')

VBox()

### Get batch predictions and prediction explanations

In [156]:
drx.Context()._deployments_use_rt_endpoint=False
preds = drx_deployment.predict_proba(
    X=df2, 
    max_explanations=30
)

VBox()

In [157]:
# merge preds with original data
preds2 = preds.merge(
    df2.reset_index(drop=True),
    how='left',
    left_index=True,
    right_index=True,
)

### Use DRX helper functions to melt and reshape our predictions

In [158]:
melted = drx.melt_explanations(preds2, id_vars=list(df2.columns)+['class_1'])
reshaped = drx.featurize_explanations(preds2)

## 2. Now it's time to create our insights

### Plot feature impact

In [159]:
# aggregate feature impact
plot_feature_impact(melted, height=500)

In [160]:
# feature impact split by strength
plot_signed_feature_impact(melted, height=500, n=25)

In [161]:
# feature impact split by positive class predictions
threshold = 0.33
positive_preds = melted.loc[melted['class_1']>=threshold, :].copy()
plot_signed_feature_impact(positive_preds, height=500, n=25)

### Plot prediction explanations

In [162]:
# plot lift chart with overlaid prediction explanations
plot_prediction_explanations_and_lift_chart(melted, project_id, showlegend=False, max_features=6)

In [163]:
# plot predictions and prediction explanations over time
date_col = 'Prediction_Point'
prep_and_plot_pe_over_time(melted, project_id, date_col, freq='MS', showlegend=False, max_features=6, height=600)

### Output pandas df with highlighted prediction explanations

In [171]:
cols_to_keep = [
    'row_id',
    'Customer_Name', 
    'Customer_Since_Date',
    'ARR', 
    'Contract_Duration', 
    'Renewal_Date', 
    'Employee_Count', 
    'Annual_Revenue', 
    'Products_Purchased', 
    'Industry',
    'Zip_Code', 
    'City', 
    'State', 
    'Latitude', 
    'Longitude',
    'Customers[Products_Purchased] (latest)',
    'Product_Usage[UserID] (1 month unique count)',
    'feature_name',
    'strength',
    'actual_value',
]

In [172]:
melted_sample = melted.loc[:, melted.columns.isin(cols_to_keep)]
plot_overlaid_prediction_explanations(melted_sample, sample=25)

Unnamed: 0_level_0,Customer_Name,Customer_Since_Date,ARR,Contract_Duration,Renewal_Date,Employee_Count,Annual_Revenue,Products_Purchased,Industry,Zip_Code,City,State,Latitude,Longitude,Customers[Products_Purchased] (latest),Product_Usage[UserID] (1 month unique count)
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,Advanced Galaxy Hill,2015-09-01,231132,12,2022-09-01,34564,3847809710,premium_plus,manufacturing,6607,Bridgeport,CT,41.167041,-73.204835,premium_plus,5.0
1,Advanced General Vision,2011-10-01,221127,36,2022-10-01,38328,4402216586,premium_plus,telecom,30521,Carnesville,GA,34.369827,-83.235155,premium_plus,6.0
2,Advanced Power Application,2020-01-01,216789,24,2023-01-01,34312,4435330004,premium_plus,banking,67206,Wichita,KS,37.692236,-97.337545,premium_plus,7.0
3,Adventure Source,2017-07-01,23349,36,2022-07-01,30461,2090454636,premium_plus,financial services,75790,Van,TX,32.524907,-95.63773,premium_plus,8.0
4,Adventure Virtual,2017-12-01,10000,36,2022-12-01,23681,3024094300,business_critical,healthcare,57317,Bonesteel,SD,43.076386,-98.944535,business_critical,6.0
5,Analysis Advanced,2014-07-01,53310,24,2022-07-01,23064,2645322253,enterprise,retail,83120,Freedom,WY,42.982664,-111.043899,enterprise,3.0
6,Analysis Studio,2016-10-01,221611,12,2022-10-01,28233,1946592209,premium_plus,insurance,26690,Swiss,WV,38.233162,-81.127884,premium_plus,5.0
7,Analysis Vision,2019-12-01,50624,24,2022-12-01,94624,11031498441,enterprise,retail,61924,Chrisman,IL,39.803647,-87.673635,enterprise,12.0
8,Application Consulting Federated,2019-11-01,97826,36,2022-11-01,478,59689330,business_critical,retail,29112,North,SC,33.615708,-81.102041,,13.0
9,Application Research Power,2018-08-01,78795,12,2022-08-01,4820,487767662,premium_plus,healthcare,24005,Roanoke,VA,37.270973,-79.941431,premium_plus,6.0
