## Walkthrough of how to output helpful insights for ML models

In [636]:
import datarobotx as drx
import datarobot as dr
import pandas as pd
import numpy as np
import datetime

from feature_impact_helpers import *
from lift_chart_helpers import *
from insights_over_time_helpers import *
from data_table_helpers import *

from importlib import reload
%reload_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

### Authenticate credentials

In [173]:
dr.Client()

<datarobot.rest.RESTClientObject at 0x134915750>

## 1. Retrieve and prep data

In [223]:
# read in dataset
df = pd.read_csv('/Users/justin.swansburg/Downloads/Customer_Churn_with_AFE.csv')

# subset data to after June 2021
df2 = df.loc[pd.to_datetime(df['Prediction_Point'])>=pd.to_datetime('2021-06-01'), :]

In [147]:
project_id = '64289f6808f30af7340d660c'
model_id = '6428a0d8763f72552338e7d6' 

In [None]:
# grab DataRobot project
project = dr.Project.get(project_id)

### Retrive DRX model object and deploy model

In [153]:
drx_model = drx.AutoMLModel.from_project_id(project_id)

VBox()

In [155]:
drx_model

VBox()

VBox()

In [154]:
#drx_deployment = drx.Deployment.from_url(f'https://staging.datarobot.com/deployments/{deployment_id}/')
today = datetime.datetime.now()
drx_deployment = drx_model.deploy(name=f'Customer Churn - {today}')

VBox()

### Get batch predictions and prediction explanations

In [243]:
drx.Context()._deployments_use_rt_endpoint=False
preds = drx_deployment.predict_proba(
    X=df2, 
    max_explanations=30
)

VBox()

In [225]:
# merge preds with original data
preds2 = preds.merge(
    df2.reset_index(drop=True),
    how='left',
    left_index=True,
    right_index=True,
)

### Use DRX helper functions to melt and reshape our predictions

In [226]:
melted = drx.melt_explanations(preds2, id_vars=list(df2.columns)+['class_1'])
reshaped = drx.featurize_explanations(preds2)

## 2. Now it's time to create our insights

### Plot feature impact

In [663]:
# aggregate feature impact
plot_feature_impact(melted, height=500)

In [662]:
# feature impact split by strength
plot_signed_feature_impact(melted, height=500, n=25)

In [661]:
# feature impact split by positive class predictions
threshold = 0.33
positive_preds = melted.loc[melted['class_1']>=threshold, :].copy()
plot_signed_feature_impact(positive_preds, height=500, n=25)

### Plot lift chart and prediction explanations

In [660]:
# plot lift chart
plot_lift_chart(preds2, project_id)

In [659]:
# plot lift chart with overlaid prediction explanations
plot_prediction_explanations_and_lift_chart(melted, project_id, showlegend=False, max_features=6)

In [658]:
plot_histogram(
    preds2,
    project.id,
    feature='Industry',
    bins=12,
    cutoff=0.1,
    split_by_predicted_class=True,
    showlegend=True,
    height=600,
)

In [657]:
# plot predictions and prediction explanations over time
date_col = 'Prediction_Point'
prep_and_plot_pe_over_time(melted, project_id, date_col, freq='MS', showlegend=False, max_features=5, height=600)

### Output pandas df with highlighted prediction explanations

In [171]:
cols_to_keep = [
    'row_id',
    'Customer_Name', 
    'Customer_Since_Date',
    'ARR', 
    'Contract_Duration', 
    'Renewal_Date', 
    'Employee_Count', 
    'Annual_Revenue', 
    'Products_Purchased', 
    'Industry',
    'Zip_Code', 
    'City', 
    'State', 
    'Latitude', 
    'Longitude',
    'Customers[Products_Purchased] (latest)',
    'Product_Usage[UserID] (1 month unique count)',
    'feature_name',
    'strength',
    'actual_value',
]

In [656]:
melted_sample = melted.loc[:, melted.columns.isin(cols_to_keep)]
plot_overlaid_prediction_explanations(melted_sample, sample=25)

## Let's look at how our features may have drifted over time

In [655]:
plot_values_over_time(
    melted, 
    project_id=project_id,
    freq='Q',
    date_col=date_col, 
    feature='Product_Usage (1 month count)',
    showlegend=True,
)

In [654]:
plot_values_over_time(
    melted, 
    project_id=project_id,
    freq='Q',
    date_col=date_col, 
    feature='Industry',
    showlegend=True,
)