# Model Interpretation

- Visualize and interpret **partial dependence plots**
- Explain individual predictions with **shapley value plots**


In [None]:
!pip install category_encoders
!pip install pdpbox
!pip install shap

In [None]:
import pandas as pd
from sklearn.linear_model import Ridge,LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from pdpbox.pdp import PDPIsolate, PDPInteract
from pdpbox.info_plots import TargetPlot, InteractTargetPlot
import shap
import warnings

In [None]:
pdp_isolate = PDPIsolate
pdp_plot = TargetPlot
pdp_interact = PDPInteract
pdp_interact_plot = InteractTargetPlot

In [None]:
warnings.filterwarnings(action='ignore', category=FutureWarning, module='xgboost')

In [None]:
pd.set_option('display.max_columns', 500)

# I. Wrangle Data

In [None]:
DATA_PATH = 'https://raw.githubusercontent.com/bloominstituteoftechnology/DS-Unit-2-Applied-Modeling/master/data/'


In [None]:
def wrangle(filepath):

  df = pd.read_csv(filepath)

  # Set issue date to index

  # subset data to 36-month loans


  # Turn `'int_rate'` col into float



  # Consolidate and OHE loan purpose (`'title'`)



  # keywords = ['business', 'consolidation|credit', 'home', 'car|vacation|medical|moving']



  # columns_to_keep = keywords + ['annual_inc','funded_amnt','int_rate','fico_range_high']
  # df = df[columns_to_keep]

  # Rename columns
  # df.rename(columns={'annual_inc': 'annual_income',
  #                   'business': 'purpose_business',
  #                   'car|vacation|medical|moving': 'purpose_major_purchase',
  #                   'consolidation|credit': 'purpose_consolidation',
  #                   'fico_range_high': 'credit_score',
  #                   'funded_amnt': 'loan_amount',
  #                   'home': 'purpose_home_purchase',
  #                   'int_rate': 'interest_rate'}, inplace=True)
  return df

df = wrangle(DATA_PATH+'lending-club/lending-club-subset.csv')

# II. Split Data

In [None]:
# Split our data in feature matrix and target vector
target = 'interest_rate'
X = df.drop(columns=target)
y = df[target]


In [None]:
# Split data into training, validation, test sets
# Because this is datetime data, we'll do a cutoff
# sort_index is the key here! take tail as test data!

# numpy array slicing syntax



# III. Establish Baseline

In [None]:
print('Mean interest rate:', y_train.mean())

y_pred = [y_train.mean()] * len(y_train)
print('Baseline MAE:', mean_absolute_error(y_train, y_pred))

# IV. Build Model

In [None]:
# not using any pipelines

model_lr = LinearRegression()
model_r = Ridge()
model_rf = RandomForestRegressor(random_state=42, n_jobs=-1)
model_xgb = XGBRegressor(random_state=42, n_jobs=-1)

In [None]:
model_lr.fit(X_train, y_train)
model_r.fit(X_train, y_train)
model_rf.fit(X_train, y_train)
model_xgb.fit(X_train, y_train)

# V. Check Metrics

In [None]:
def check_metrics(model):
  print('Training MAE:', mean_absolute_error(y_train, model.predict(X_train)))
  print('Validation MAE:', mean_absolute_error(y_val, model.predict(X_val)))
  print('Validation R^2:', model.score(X_val, y_val))
  print()
  print()

models = [model_lr, model_r, model_rf, model_xgb]

for m in models:
  check_metrics(m)

# VI. Communicate Results

In [None]:
coefficients =
coef = pd.DataFrame(data=coefficients, index=X_train.columns,columns=['coefficients'])
coef['coefficients'].sort_values().plot(kind='barh')
plt.title('Linear Regression Coefficients')
plt.show()

In [None]:
importances =
imp = pd.DataFrame(data=importances, index=X_train.columns, columns=['coefficients'])
imp['coefficients'].sort_values().plot(kind='barh')
plt.title('XGBoost Feature Importances')
plt.show()

In [None]:
from sklearn.tree import plot_tree

plt.figure(figsize=(60,15))

plot_tree(

     max_depth=2,
     feature_names=X_train.columns,
     filled=True
);

## Partial Dependence Plots

One Feature

In [None]:
# select one feature

In [None]:
# create an instance of PDP Isolate.

Two features

In [None]:
# select two features

In [None]:
# create an instance of PDP Interact.


## Shapley Plots

In [None]:
#select one sample/observation

In [None]:
# Shapley Force Plot
# This may not work well in local notebooks
# you may need to install ipywidgets

#initialization of java script

