#Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from  sklearn import  datasets 

#Load Data and Exploratory Data Analysis

In [None]:
# Will use training set as data corpus since it's relatively large
df = pd.read_csv('https://raw.githubusercontent.com/taspett/dataset/master/cs-training.csv')


# Transform Data

In [None]:
df.rename(columns={'age': 'Age',
                   'SeriousDlqin2yrs': 'Approval',
                   'RevolvingUtilizationOfUnsecuredLines': 'Utilization',
                   'NumberOfTime30-59DaysPastDueNotWorse':'30-59DaysPastDue',
                   'NumberOfOpenCreditLinesAndLoans':'OpenCreditLinesAndLoans',
                   'NumberOfTimes90DaysLate':'90DaysLate',
                   'NumberRealEstateLoansOrLines':'RealEstateLoans',
                   'NumberOfTime60-89DaysPastDueNotWorse':'60-89DaysPastDue',
                   'NumberOfDependents':'Dependents'}, 
                 inplace=True)
df['Approval'] = np.where(df.Approval == 1, 0, 1) # reverse

df.dropna(axis=0, inplace=True)
df = df.drop(columns=['Unnamed: 0'])

#Correlation Matrix

In [None]:
def halfCM(df):
  corrMatrix = df.corr()
  colormap='PiYG' # https://matplotlib.org/3.1.0/gallery/color/colormap_reference.html
  mask = np.triu(np.ones_like(corrMatrix, dtype=np.bool))
  plt.figure(figsize=(14, 12))
  chart = sns.heatmap(corrMatrix, mask=mask, cmap=colormap, vmax=1.0, vmin=-1.0, annot=True)
  chart.set_xticklabels(chart.get_xticklabels(), rotation=45)
  plt.show()
  return

halfCM(df)


In [None]:
# Remove Correlated Features
X = df.drop(columns=['60-89DaysPastDue', '90DaysLate'])


In [None]:
# Run Correlation Matrix again
halfCM(X)

# Make X and y, train_test_split

In [None]:
# Make y and X

y = df['Approval']
X = X.drop(columns='Approval')

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.005, random_state=42)

X_test.shape

#Models

In [None]:
# from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score

regr = RandomForestRegressor(random_state=0, n_jobs=-1)
model = regr.fit(X_train, y_train)

# Print something
preds=model.predict(X_test)
rpreds = np.around(preds)
print("Accuracy = {}".format(accuracy_score(y_test, rpreds)))

#Feature Importance


In [None]:
df=X_train

# Feature importance dataframe
imp_df = pd.DataFrame({'feature': df.columns,
                       'importance': model.feature_importances_})

# Reorder by importance
ordered_df = imp_df.sort_values(by='importance')
imp_range=range(1,len(imp_df.index)+1)

plt.figure(figsize=(8,6))

## Barplot with confidence intervals
height = ordered_df['importance']
bars = ordered_df['feature']
y_pos = np.arange(len(bars))

plt.rcParams.update({'font.size': 12})

plt.barh(y_pos, height)
plt.yticks(y_pos, bars)
plt.xlabel("Importance")

plt.tight_layout()
plt.show()


# My ICE Plots

code from: 

## Import Pycebox (need ice)

In [None]:
!pip install pycebox

# Just need ice
from pycebox.ice import ice, ice_plot

## Load my_ice_plot

In [None]:
# Original Copyright (c) 2015 Austin Rochford and contributors.
# This code is a derivative work of code distributed under the  MIT License (MIT)
# https://github.com/AustinRochford/PyCEbox
       
def my_ice_plot(ice_data, 
                INS_df, # Instance of interest
                INS_xy, # feature value and prediction for the instance
                frac_to_plot=1.,
                plot_points=False, point_kwargs=None,
                x_quantile=False, 
                centered=False, centered_quantile=0.,
                color_by=None, cmap=None, ax=None, 
                plot_pdp=False,
                plot_ins=False,
                pdp_kwargs=None, 
                INS_df_kwargs=None, 
                INS_xy_kwargs=None,
                **kwargs):

    if not ice_data.index.is_monotonic_increasing:
        ice_data = ice_data.sort_index()

    if centered:
        quantiles = _get_quantiles(ice_data.index)
        centered_quantile_iloc = np.abs(quantiles - centered_quantile).argmin()
        ice_data = ice_data - ice_data.iloc[centered_quantile_iloc]

    if frac_to_plot < 1.:
        n_cols = ice_data.shape[1]
        icols = np.random.choice(n_cols, size=frac_to_plot * n_cols, replace=False)
        plot_ice_data = ice_data.iloc[:, icols]
    else:
        plot_ice_data = ice_data


    if x_quantile:
        x = _get_quantiles(ice_data.index)
    else:
        x = ice_data.index

    if plot_points:
        point_x_ilocs = _get_point_x_ilocs(plot_ice_data.index, plot_ice_data.columns)
        point_x = x[point_x_ilocs]
        point_y = plot_ice_data.values[point_x_ilocs, np.arange(point_x_ilocs.size)]

    if ax is None:
        _, ax = plt.subplots()


    if color_by is not None:
        if isinstance(color_by, six.string_types):
            colors_raw = plot_ice_data.columns.get_level_values(color_by).values
        elif hasattr(color_by, '__call__'):
            col_df = pd.DataFrame(list(plot_ice_data.columns.values), columns=plot_ice_data.columns.names)
            colors_raw = color_by(col_df)
        else:
            raise ValueError('color_by must be a string or function')

        norm = colors.Normalize(colors_raw.min(), colors_raw.max())
        m = cm.ScalarMappable(norm=norm, cmap=cmap)

        for color_raw, (_, ice_curve) in zip(colors_raw, plot_ice_data.iteritems()):
            c = m.to_rgba(color_raw)
            ax.plot(x, ice_curve, c=c, zorder=0, **kwargs)
    else:
        ax.plot(x, plot_ice_data, zorder=0, **kwargs)

    if plot_points:
        ax.scatter(point_x, point_y, zorder=30, **(point_kwargs or {}))

    if plot_pdp:
        pdp_kwargs = pdp_kwargs or {}
        pdp_data = pdp(ice_data)
        ax.plot(x, pdp_data, **pdp_kwargs)

# Plot Instance
    if plot_ins:
      ax.plot(x, INS_df, **INS_df_kwargs)
      ax.plot(INS_xy[0], INS_xy[1], zorder=10, **INS_xy_kwargs)
    return ax 


def pdp(ice_data):
    Hilight = ice_data.mean(axis=1)
    return Hilight



##Function Definitions

In [None]:
# Function Definitions

def get_ins(feature_ice_df, odf, Feature, INS):
  instance_df = move_col(odf, Feature)  
  INS_tuple = tuple(instance_df.iloc[INS, :])
#  print('INS_tuple: ', INS_tuple)
  INS_line = feature_ice_df.loc[:, INS_tuple]
#  print('INS_line:', INS_line)
  INS_df = pd.DataFrame(INS_line)
#  print(INS_df)
  return INS_df

def move_col(df, Col):
  df1 = pd.DataFrame(df[Col])
  df2 = df.drop(Col, axis=1)
  df3 = pd.concat([df1, df2], axis=1)
  return df3

def Ins_Feat_Pred(INS, Feat, odf, INS_df):
  FeatN = odf.columns.get_loc(Feat)
  Row_idx = odf.iloc[INS, FeatN]
  idf = move_col(odf, Feat)
  Col_idx = tuple(idf.iloc[INS, :])
  Pred = INS_df.loc[Row_idx, Col_idx]
  return tuple([Row_idx, Pred])


## Single Plots

In [None]:
INS = 202
DATA = X_test
Feature = 'Utilization'

sns.set(style="white",  font_scale=1.2, rc={"figure.figsize":(12,9)})

# Using ice function from Pycebox
feature_ice_df = ice(data=DATA, column=Feature, predict=model.predict)

# get Instance df and xy
INS_df = get_ins(feature_ice_df, DATA, Feature, INS)
INS_xy = Ins_Feat_Pred(INS, Feature, DATA, INS_df)

# Color names: https://matplotlib.org/gallery/color/named_colors.html
my_ice_plot(feature_ice_df, 
            INS_df, 
            INS_xy,
            linewidth=0.15,
            plot_pdp=True,
            plot_ins=True,
            pdp_kwargs={'c': 'k', 'linewidth': 3, 'alpha':0.9, 'zorder':5000},           
            INS_df_kwargs={'c': 'cornflowerblue', 'linewidth': 3, 'marker':'o', 'markersize':8, 'markerfacecolor':'darkblue'},
            INS_xy_kwargs={'marker':'D', 'markersize':14, 'markerfacecolor':'gold', 'markeredgecolor':'navy'}
            )

plt.ylabel('Prediction')
plt.xlabel(Feature)
plt.show()

## Multiple ICE plots

In [None]:

# INStance number, number of Columns, Number of Features
# INS = 78
INS = 202
NumF = 6 
Row = 3
Col = 2
DATA = X_test

# Set in Feature Imporatnce
rdf = ordered_df['feature'].values[::-1]

feature_names = rdf[0:NumF]

fig, axes = plt.subplots(Row, Col, figsize=(10,12))
R=0
C=0

for feature_name in feature_names[:NumF]:
    feature_ice_df = ice(data=DATA, column=feature_name, predict=model.predict)
    # get Instance df and xy
    INS_df = get_ins(feature_ice_df, DATA, feature_name, INS)
    INS_xy = Ins_Feat_Pred(INS, feature_name, DATA, INS_df)

    plt.subplot(axes[R, C])
    my_ice_plot(feature_ice_df, 
                INS_df, 
                INS_xy,
                linewidth=0.15,
                plot_pdp=True,
                plot_ins=True,
                pdp_kwargs={'c': 'k', 'linewidth': 2},           
                INS_df_kwargs={'c': 'cornflowerblue', 'linewidth': 2, 'marker':'o', 'markersize':3, 'markerfacecolor':'navy'},
                INS_xy_kwargs={'marker':'D', 'markersize':8, 'markerfacecolor':'gold', 'markeredgecolor':'navy'},
                ax=axes[R, C]
                )

    axes[R, C].set_xlabel(feature_name)
    if C == (Col-1): 
      C=0
      R+=1
    else:
        C+=1

fig=axes[0,0].figure
fig.text(0.0,0.5, 'Prediction', ha="center", va="center", rotation=90)

plt.suptitle('ICE curves', fontsize=18)
plt.tight_layout(rect=[0, 0, 1, 0.95])
