In [92]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, LassoCV, BayesianRidge, LogisticRegression, LogisticRegressionCV
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier

from dmba import classificationSummary, gainsChart, liftChart, plotDecisionTree
from dmba import regressionSummary, exhaustive_search
from dmba import adjusted_r2_score, AIC_score, BIC_score

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
import re
import math
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Data Preprocessing

In [93]:
# Read data file
filename = 'bank.csv'
df = pd.read_csv(filename, delimiter=';')

# Remove "duration" from dataframe
df = df.drop(["duration"], axis=1)

# Create dependent variable "y" as 0s and 1s
y = df["y"] = (df["y"] == "yes") * 1

# Change -1 values in pdays to 999
df.loc[df["pdays"] == -1, "pdays"] = 999

# Create independent variables with one-hot encoding for categorical features
X = pd.get_dummies(df.drop("y", axis=1), drop_first=True)

# Train, test, and split the data 
train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.4, 
                                                     random_state=1)


In [94]:
X_norm = (X-X.min())/(X.max()-X.min())
# To avoid multicollinearity, we must remove the pdays variable since it has a 98% correlation with poutcome
X_norm = X_norm.drop(["pdays"], axis=1)

train_X_norm, valid_X_norm, train_y_norm, valid_y_norm = train_test_split(X_norm, y, test_size=0.4, 
                                                     random_state=1)


In [95]:
y

0       0
1       0
2       0
3       0
4       0
       ..
4516    0
4517    0
4518    0
4519    0
4520    0
Name: y, Length: 4521, dtype: int64

In [96]:
X.columns

Index(['age', 'balance', 'day', 'campaign', 'pdays', 'previous',
       'job_blue-collar', 'job_entrepreneur', 'job_housemaid',
       'job_management', 'job_retired', 'job_self-employed', 'job_services',
       'job_student', 'job_technician', 'job_unemployed', 'job_unknown',
       'marital_married', 'marital_single', 'education_secondary',
       'education_tertiary', 'education_unknown', 'default_yes', 'housing_yes',
       'loan_yes', 'contact_telephone', 'contact_unknown', 'month_aug',
       'month_dec', 'month_feb', 'month_jan', 'month_jul', 'month_jun',
       'month_mar', 'month_may', 'month_nov', 'month_oct', 'month_sep',
       'poutcome_other', 'poutcome_success', 'poutcome_unknown'],
      dtype='object')

## Correlation Analysis

In [97]:
df.corr()["y"].to_frame().sort_values("y", ascending=False)

Unnamed: 0,y
y,1.0
previous,0.116714
age,0.045092
balance,0.017905
day,-0.011244
campaign,-0.061147
pdays,-0.171931


In [98]:
pddf = pd.DataFrame(y).join(X).corr()#.style.background_gradient(cmap = "Greens")
pddf.style.background_gradient()

Unnamed: 0,y,age,balance,day,campaign,pdays,previous,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_married,marital_single,education_secondary,education_tertiary,education_unknown,default_yes,housing_yes,loan_yes,contact_telephone,contact_unknown,month_aug,month_dec,month_feb,month_jan,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,poutcome_unknown
y,1.0,0.045092,0.017905,-0.011244,-0.061147,-0.171931,0.116714,-0.068147,-0.015968,0.004872,0.032634,0.086675,-0.003827,-0.024071,0.047809,-0.010154,-0.007312,0.019886,-0.064643,0.045815,-0.028744,0.056649,-0.00887,0.001303,-0.104683,-0.070517,0.025878,-0.139399,0.012084,0.069884,0.039805,-0.004109,-0.038851,-0.013323,0.102716,-0.102077,-0.014397,0.145964,0.07151,0.051908,0.283481,-0.162038
age,0.045092,1.0,0.08382,-0.017853,-0.005148,-0.025137,-0.003511,-0.049307,0.015638,0.092979,-0.031083,0.453171,0.005505,-0.078352,-0.186691,-0.072723,-0.004259,0.060379,0.275139,-0.410768,-0.106872,-0.094042,0.081111,-0.017885,-0.193888,-0.01125,0.18306,-0.035125,0.069761,0.006492,0.007476,0.006915,0.009329,0.034671,0.057909,-0.119784,0.027659,0.081766,-0.015074,-0.026179,0.048626,-0.017414
balance,0.017905,0.08382,1.0,-0.008677,-0.009976,-0.03999,0.026196,-0.057691,0.014523,0.035016,0.059753,0.068974,-0.002064,-0.033758,0.00554,-0.013779,-0.018902,0.002419,0.017158,0.007525,-0.076574,0.076487,0.01923,-0.070886,-0.050227,-0.071349,0.034025,-0.018892,0.004277,0.047503,-0.007825,-0.027283,-0.090516,0.056023,0.021805,-0.070809,0.120363,0.058694,0.005975,0.000129,0.029999,-0.033843
day,-0.011244,-0.017853,-0.008677,1.0,0.160706,0.064393,-0.059114,-0.027025,-0.015707,-0.011995,0.02151,-0.010072,0.006601,-0.015449,0.007968,0.014718,0.003694,-0.000817,-0.001438,0.006769,0.007745,0.007465,0.000787,-0.013261,-0.031291,-0.004879,0.053527,-0.048143,0.027643,0.000685,-0.258078,0.263857,0.125002,-0.217517,-0.02457,-0.028992,0.095832,0.040235,-0.043666,-0.021062,-0.02772,0.0751
campaign,-0.061147,-0.005148,-0.009976,0.160706,1.0,0.118659,-0.067833,0.008783,-0.01291,-0.015051,0.030158,-0.024452,0.03204,0.002964,-0.017734,-0.008999,-0.006255,-0.007136,0.022,-0.008093,-0.01951,0.022631,-0.020508,-0.012348,-0.003574,0.01712,0.026571,0.004882,0.147728,-0.020229,-0.039565,-0.058947,0.124571,0.044317,-0.004045,-0.076263,-0.083385,-0.058536,-0.040207,-0.030435,-0.058268,0.117375
pdays,-0.171931,-0.025137,-0.03999,0.064393,0.118659,1.0,-0.678335,0.036875,0.02349,0.003385,-0.01775,-0.028566,0.01087,0.030006,-0.034399,-0.005401,0.007514,0.005815,0.013877,-0.031566,0.005723,-0.03564,-0.010427,0.040958,-0.041253,0.042064,-0.048365,0.285404,0.1018,-0.09526,-0.104174,-0.115049,0.158754,0.117814,-0.040689,0.014583,-0.105033,-0.083419,-0.091422,-0.452472,-0.394908,0.986319
previous,0.116714,-0.003511,0.026196,-0.059114,-0.067833,-0.678335,1.0,-0.014861,-0.013226,-0.017453,0.001987,0.006662,0.005772,-0.018623,0.034265,0.009148,-0.005867,-0.002315,-0.01764,0.035558,-0.00841,0.026977,-0.004239,-0.026656,0.038621,-0.022115,0.031797,-0.194142,-0.071315,0.055403,0.065615,0.062173,-0.114808,-0.084432,0.019445,0.027549,0.0554,0.088764,0.059763,0.358382,0.250277,-0.682746
job_blue-collar,-0.068147,-0.049307,-0.057691,-0.027025,0.008783,0.036875,-0.014861,1.0,-0.101057,-0.081987,-0.268679,-0.119095,-0.105655,-0.163973,-0.070779,-0.232701,-0.087808,-0.04736,0.120618,-0.09401,0.04512,-0.321384,0.005109,-0.008048,0.17504,0.017244,-0.015232,0.14573,-0.127641,-0.03429,-0.023787,-0.036573,-0.011576,0.020082,-0.032839,0.165276,-0.060881,-0.044295,-0.02489,-0.011244,-0.042435,0.026502
job_entrepreneur,-0.015968,0.015638,0.014523,-0.015707,-0.01291,0.02349,-0.013226,-0.101057,1.0,-0.031311,-0.102609,-0.045483,-0.04035,-0.062622,-0.027031,-0.088869,-0.033534,-0.018087,0.067564,-0.064801,-0.064775,0.058345,0.02379,0.037983,-0.002577,0.049794,0.008513,0.002056,-0.035459,-0.013095,0.009473,-0.009855,0.028235,-0.020819,-0.020564,0.000128,0.073161,-0.017498,0.000742,-0.001836,-0.026645,0.022264
job_housemaid,0.004872,0.092979,0.035016,-0.011995,-0.015051,0.003385,-0.017453,-0.081987,-0.031311,1.0,-0.083246,-0.0369,-0.032736,-0.050805,-0.02193,-0.072099,-0.027206,-0.014674,0.043095,-0.047196,-0.082916,-0.035585,0.002626,0.001298,-0.070044,-0.016287,0.054477,-0.011882,0.034114,-0.010624,0.003295,-0.021324,0.041201,0.039097,-0.00294,-0.069688,-0.013381,0.021783,0.009499,-0.006137,0.006874,0.008196


In [99]:
pddf = pd.DataFrame(y).join(X_norm).corr()#.style.background_gradient(cmap = "Greens")
pddf.style.background_gradient()

Unnamed: 0,y,age,balance,day,campaign,previous,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_married,marital_single,education_secondary,education_tertiary,education_unknown,default_yes,housing_yes,loan_yes,contact_telephone,contact_unknown,month_aug,month_dec,month_feb,month_jan,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,poutcome_unknown
y,1.0,0.045092,0.017905,-0.011244,-0.061147,0.116714,-0.068147,-0.015968,0.004872,0.032634,0.086675,-0.003827,-0.024071,0.047809,-0.010154,-0.007312,0.019886,-0.064643,0.045815,-0.028744,0.056649,-0.00887,0.001303,-0.104683,-0.070517,0.025878,-0.139399,0.012084,0.069884,0.039805,-0.004109,-0.038851,-0.013323,0.102716,-0.102077,-0.014397,0.145964,0.07151,0.051908,0.283481,-0.162038
age,0.045092,1.0,0.08382,-0.017853,-0.005148,-0.003511,-0.049307,0.015638,0.092979,-0.031083,0.453171,0.005505,-0.078352,-0.186691,-0.072723,-0.004259,0.060379,0.275139,-0.410768,-0.106872,-0.094042,0.081111,-0.017885,-0.193888,-0.01125,0.18306,-0.035125,0.069761,0.006492,0.007476,0.006915,0.009329,0.034671,0.057909,-0.119784,0.027659,0.081766,-0.015074,-0.026179,0.048626,-0.017414
balance,0.017905,0.08382,1.0,-0.008677,-0.009976,0.026196,-0.057691,0.014523,0.035016,0.059753,0.068974,-0.002064,-0.033758,0.00554,-0.013779,-0.018902,0.002419,0.017158,0.007525,-0.076574,0.076487,0.01923,-0.070886,-0.050227,-0.071349,0.034025,-0.018892,0.004277,0.047503,-0.007825,-0.027283,-0.090516,0.056023,0.021805,-0.070809,0.120363,0.058694,0.005975,0.000129,0.029999,-0.033843
day,-0.011244,-0.017853,-0.008677,1.0,0.160706,-0.059114,-0.027025,-0.015707,-0.011995,0.02151,-0.010072,0.006601,-0.015449,0.007968,0.014718,0.003694,-0.000817,-0.001438,0.006769,0.007745,0.007465,0.000787,-0.013261,-0.031291,-0.004879,0.053527,-0.048143,0.027643,0.000685,-0.258078,0.263857,0.125002,-0.217517,-0.02457,-0.028992,0.095832,0.040235,-0.043666,-0.021062,-0.02772,0.0751
campaign,-0.061147,-0.005148,-0.009976,0.160706,1.0,-0.067833,0.008783,-0.01291,-0.015051,0.030158,-0.024452,0.03204,0.002964,-0.017734,-0.008999,-0.006255,-0.007136,0.022,-0.008093,-0.01951,0.022631,-0.020508,-0.012348,-0.003574,0.01712,0.026571,0.004882,0.147728,-0.020229,-0.039565,-0.058947,0.124571,0.044317,-0.004045,-0.076263,-0.083385,-0.058536,-0.040207,-0.030435,-0.058268,0.117375
previous,0.116714,-0.003511,0.026196,-0.059114,-0.067833,1.0,-0.014861,-0.013226,-0.017453,0.001987,0.006662,0.005772,-0.018623,0.034265,0.009148,-0.005867,-0.002315,-0.01764,0.035558,-0.00841,0.026977,-0.004239,-0.026656,0.038621,-0.022115,0.031797,-0.194142,-0.071315,0.055403,0.065615,0.062173,-0.114808,-0.084432,0.019445,0.027549,0.0554,0.088764,0.059763,0.358382,0.250277,-0.682746
job_blue-collar,-0.068147,-0.049307,-0.057691,-0.027025,0.008783,-0.014861,1.0,-0.101057,-0.081987,-0.268679,-0.119095,-0.105655,-0.163973,-0.070779,-0.232701,-0.087808,-0.04736,0.120618,-0.09401,0.04512,-0.321384,0.005109,-0.008048,0.17504,0.017244,-0.015232,0.14573,-0.127641,-0.03429,-0.023787,-0.036573,-0.011576,0.020082,-0.032839,0.165276,-0.060881,-0.044295,-0.02489,-0.011244,-0.042435,0.026502
job_entrepreneur,-0.015968,0.015638,0.014523,-0.015707,-0.01291,-0.013226,-0.101057,1.0,-0.031311,-0.102609,-0.045483,-0.04035,-0.062622,-0.027031,-0.088869,-0.033534,-0.018087,0.067564,-0.064801,-0.064775,0.058345,0.02379,0.037983,-0.002577,0.049794,0.008513,0.002056,-0.035459,-0.013095,0.009473,-0.009855,0.028235,-0.020819,-0.020564,0.000128,0.073161,-0.017498,0.000742,-0.001836,-0.026645,0.022264
job_housemaid,0.004872,0.092979,0.035016,-0.011995,-0.015051,-0.017453,-0.081987,-0.031311,1.0,-0.083246,-0.0369,-0.032736,-0.050805,-0.02193,-0.072099,-0.027206,-0.014674,0.043095,-0.047196,-0.082916,-0.035585,0.002626,0.001298,-0.070044,-0.016287,0.054477,-0.011882,0.034114,-0.010624,0.003295,-0.021324,0.041201,0.039097,-0.00294,-0.069688,-0.013381,0.021783,0.009499,-0.006137,0.006874,0.008196
job_management,0.032634,-0.031083,0.059753,0.02151,0.030158,0.001987,-0.268679,-0.102609,-0.083246,1.0,-0.120923,-0.107277,-0.166491,-0.071865,-0.236274,-0.089156,-0.048088,-0.047153,0.044796,-0.407851,0.586132,-0.035407,-0.009598,-0.04946,-0.042099,-0.029221,-0.090939,0.10769,0.013916,-0.008935,0.000844,-0.031656,-0.016425,0.023414,-0.084715,0.060786,0.011666,0.014431,0.017892,-0.015051,-0.015563


# Logistic Regression

In [100]:
# logit_reg = LogisticRegression()
# logit_reg.fit(train_X, train_y)

# Logistic Regression with Normalized Data

In [101]:
logit_reg_norm = LogisticRegression()
logit_reg_norm.fit(train_X_norm, train_y_norm)

In [102]:
# # Numeric features were normalized to compute feature importance on the size of the coefficients
# pd.DataFrame({'abs_coeff': abs(logit_reg.coef_[0]), 'coeff': logit_reg.coef_[0]},
#                     index=X.columns).sort_values("abs_coeff", ascending=False).drop("abs_coeff", axis=1)

In [103]:
# Numeric features were normalized to compute feature importance on the size of the coefficients
pd.DataFrame({'abs_coeff': abs(logit_reg_norm.coef_[0]), 'coeff': logit_reg_norm.coef_[0]},
                    index=X_norm.columns).sort_values("abs_coeff", ascending=False).drop("abs_coeff", axis=1)

Unnamed: 0,coeff
poutcome_success,2.260562
month_oct,1.291936
contact_unknown,-1.006026
month_mar,0.930462
job_unknown,0.91749
job_retired,0.766513
month_nov,-0.652982
loan_yes,-0.594359
month_dec,0.534951
marital_married,-0.495794


In [104]:
logit_reg_norm = LogisticRegression()
logit_reg_norm.fit(train_X, train_y)

print('intercept ', logit_reg_norm.intercept_[0])

# print(pd.DataFrame({'coeff': logit_reg_norm.coef_[0], 
#                     'odds ratio': math.e**logit_reg_norm.coef_[0]}, 
#                     index=X.columns).transpose())
print(pd.DataFrame({'coeff': logit_reg_norm.coef_[0]},
                    index=X.columns).transpose())
                
print('AIC', AIC_score(valid_y, logit_reg_norm.predict(valid_X), df = len(train_X.columns) + 1))

print("training score : %.3f" % (logit_reg_norm.score(X, y)))

# validation set 
logit_reg_pred = logit_reg_norm.predict(valid_X)
logit_reg_proba = logit_reg_norm.predict_proba(valid_X)
logit_result = pd.DataFrame({'actual': valid_y, 
                             'p(0)': [p[0] for p in logit_reg_proba],
                             'p(1)': [p[1] for p in logit_reg_proba],
                             'predicted': logit_reg_pred })

# # display four different cases
# interestingCases = [2764, 932, 2721, 702]
# print(logit_result.loc[interestingCases])

intercept  -0.14034230209582843
            age       balance       day  campaign     pdays  previous  \
coeff  0.008033  2.753375e-07 -0.016277 -0.024539 -0.001605 -0.060909   

       job_blue-collar  job_entrepreneur  job_housemaid  job_management  ...  \
coeff        -0.287952         -0.049657      -0.005492        0.048567  ...   

       month_jul  month_jun  month_mar  month_may  month_nov  month_oct  \
coeff  -0.004813   0.003596    0.10967  -0.404845  -0.157129   0.197669   

       month_sep  poutcome_other  poutcome_success  poutcome_unknown  
coeff    0.03865       -0.069311          0.316774          0.046879  

[1 rows x 41 columns]
AIC 1332.7731375126218
training score : 0.886


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [105]:
# logit_reg = LogisticRegression()
# logit_reg.fit(train_X, train_y)

# print('intercept ', logit_reg.intercept_[0])

# # print(pd.DataFrame({'coeff': logit_reg.coef_[0], 
# #                     'odds ratio': math.e**logit_reg.coef_[0]}, 
# #                     index=X.columns).transpose())
# print(pd.DataFrame({'coeff': logit_reg.coef_[0]},
#                     index=X.columns).transpose())
                
# print('AIC', AIC_score(valid_y, logit_reg.predict(valid_X), df = len(train_X.columns) + 1))

# print("training score : %.3f" % (logit_reg.score(X, y)))

# # validation set 
# logit_reg_pred = logit_reg.predict(valid_X)
# logit_reg_proba = logit_reg.predict_proba(valid_X)
# logit_result = pd.DataFrame({'actual': valid_y, 
#                              'p(0)': [p[0] for p in logit_reg_proba],
#                              'p(1)': [p[1] for p in logit_reg_proba],
#                              'predicted': logit_reg_pred })

# # # display four different cases
# # interestingCases = [2764, 932, 2721, 702]
# # print(logit_result.loc[interestingCases])

In [106]:
# # validation set 
# logit_reg_norm_pred = logit_reg.predict(valid_X_norm)
# logit_reg_norm_proba = logit_reg.predict_proba(valid_X_norm)
# logit_norm_result = pd.DataFrame({'actual': valid_y_norm, 
#                              'p(0)': [p[0] for p in logit_reg_norm_proba],
#                              'p(1)': [p[1] for p in logit_reg_norm_proba],
#                              'predicted': logit_reg_norm_pred })

In [107]:
print('intercept ', logit_reg_norm.intercept_[0])

# coefficients in order of importance
pd.DataFrame({'coeff': logit_reg_norm.coef_[0], 'abs_coeff': abs(logit_reg_norm.coef_[0])},
                    index=X_norm.columns).sort_values("abs_coeff", ascending=False).drop("abs_coeff", axis=1)

intercept  -0.14034230209582843


ValueError: Length of values (41) does not match length of index (40)

In [None]:
df1 = logit_result.sort_values(by=['p(1)'], ascending=False)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
gainsChart(df1.actual, ax=axes[0])
liftChart(df1['p(1)'], title=False, ax=axes[1])
plt.show()

In [None]:
df1 = logit_norm_result.sort_values(by=['p(1)'], ascending=False)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
gainsChart(df1.actual, ax=axes[0])
liftChart(df1['p(1)'], title=False, ax=axes[1])
plt.show()

In [None]:
classificationSummary(valid_y, logit_reg.predict(valid_X))

## Random Forest

In [None]:
rf = RandomForestClassifier(n_estimators=500, random_state=1)
rf.fit(train_X, train_y)

# variable (feature) importance plot
importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_], axis=0)

df = pd.DataFrame({'feature': train_X.columns, 'importance': importances, 'std': std})
df = df.sort_values('importance')
print(df)

ax = df.plot(kind='barh', xerr='std', x='feature')
ax.set_ylabel('')
plt.show()

# confusion matrix for validation set
classificationSummary(valid_y, rf.predict(valid_X))

In [None]:
df.sort_values("importance", ascending=False)

In [None]:
rf_pred = rf.predict(valid_X)
rf_proba = rf.predict_proba(valid_X)
rf_result = pd.DataFrame({'actual': valid_y, 
                             'p(0)': [p[0] for p in rf_proba],
                             'p(1)': [p[1] for p in rf_proba],
                             'predicted': rf_pred })
# display four different cases
interestingCases = [2764, 932, 2721, 702]
print(rf_result.loc[interestingCases])

In [None]:
df1 = rf_result.sort_values(by=['p(1)'], ascending=False)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
gainsChart(df1.actual, ax=axes[0])
liftChart(df1['p(1)'], title=False, ax=axes[1])
plt.show()

In [None]:
classificationSummary(valid_y, rf.predict(valid_X))

## Decision Tree

In [None]:
classTree = DecisionTreeClassifier(random_state=0, max_depth=4)
classTree.fit(train_X, train_y)
plotDecisionTree(classTree, feature_names=train_X.columns, 
                 class_names=classTree.classes_)

In [None]:
# validation set 
classTree_pred = classTree.predict(valid_X)
classTree_proba = classTree.predict_proba(valid_X)
classTree_result = pd.DataFrame({'actual': valid_y, 
                             'p(0)': [p[0] for p in classTree_proba],
                             'p(1)': [p[1] for p in classTree_proba],
                             'predicted': classTree_pred })
# display four different cases
interestingCases = [2764, 932, 2721, 702]
print(classTree_result.loc[interestingCases])

In [None]:
df1 = classTree_result.sort_values(by=['p(1)'], ascending=False)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
gainsChart(df1.actual, ax=axes[0])
liftChart(df1['p(1)'], title=False, ax=axes[1])
plt.show()

In [None]:
classificationSummary(valid_y, classTree.predict(valid_X))