# ML Foreclosure Binary Classification - EPOCH 2 data

    target = zeroBalCode (set to object dtype) 
    Binary classification sets zeroBalCode to either "0" or "1" for the 7 possible outcomes
    
    "0" = "Loan Success"
        * 01 = Prepaid or Matured

    "1" =  "Loan Failure" - Most likely loss of money (but not necessarily) one of below conditions:
        * 02 = Third Party Sale
        * 03 = Short Sale
        * 06 = Repurchased
        * 09 = Deed-in-Lieu,REO
        * 15 = Note Sale
        * 16 = Reperforming Loan Sale

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#!pip install pycaret
from imblearn.over_sampling import RandomOverSampler
from pycaret.classification import *
#from pycaret.regression import *

Using TensorFlow backend.


# Importing the data - only use *.ML.csv files!!!
If you have *.MLReady.csv then [click here](#MLReady)

In [3]:
df=pd.read_csv("data/FM_Acq_Perf_FMAC_EPOCH2_ML.csv")
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,id,origChannel,origIntRate,origUPB,origDate,firstPmtDate,origLTV,numBorrowers,...,fmacRateMin,fmacRateAvg,fmacRateVolatility,fredRate,rateDiffAbove,rateDiffBelow,rateDiffAvg,rateDiffAbovePct,rateDiffBelowPct,rateDiffAvgPct
0,1133339,1133339,100209956357,2,5.125,348000,2009-02-01,2009-04-01,87.0,1.0,...,5.04,5.13,0.041667,2.87,-0.125,-0.085,-0.005,-0.02381,-0.016865,-0.000975
1,1133340,1133340,100392775700,3,4.625,195000,2009-02-01,2009-04-01,52.0,2.0,...,5.04,5.13,0.041667,2.87,-0.625,0.415,-0.505,-0.119048,0.082341,-0.098441
2,1133341,1133341,100621428854,2,4.875,342000,2009-02-01,2009-04-01,80.0,1.0,...,5.04,5.13,0.041667,2.87,-0.375,0.165,-0.255,-0.071429,0.032738,-0.049708
3,1133342,1133342,100720959726,1,5.375,93000,2009-02-01,2009-05-01,70.0,1.0,...,5.04,5.13,0.041667,2.87,0.125,-0.335,0.245,0.02381,-0.066468,0.047758
4,1133343,1133343,100783704799,1,4.875,182000,2009-02-01,2009-04-01,76.0,2.0,...,5.04,5.13,0.041667,2.87,-0.375,0.165,-0.255,-0.071429,0.032738,-0.049708


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119696 entries, 0 to 119695
Data columns (total 60 columns):
Unnamed: 0                    119696 non-null int64
Unnamed: 0.1                  119696 non-null int64
id                            119696 non-null int64
origChannel                   119696 non-null int64
origIntRate                   119696 non-null float64
origUPB                       119696 non-null int64
origDate                      119696 non-null object
firstPmtDate                  119696 non-null object
origLTV                       119696 non-null float64
numBorrowers                  119696 non-null float64
origDebtIncRatio              119696 non-null float64
borrCreditScore               119696 non-null float64
loanPurp                      119696 non-null int64
zipCode                       119696 non-null int64
pMIperct                      119696 non-null float64
mortInsType                   119696 non-null float64
bestCreditScore               119696 non-

## Remove columns not involved in 'Foreclosure or Not' query

In [5]:
df.drop(['Unnamed: 0','Unnamed: 0.1','id','firstPmtDate','rptPeriod','currIntRate','currUPB','loanAge','monMatur',\
         'zeroBalDate','lastPdInstDate','forecloDate','dispDate','forecloCost','propRepCost','recovCosts',\
         'miscCost','holdTaxCost','saleProceed','credEnhProceed','repurchProceed','otherForecloProceed',\
         'nonIntUPB','prinForgivBal','repurchMakeWholeProceedFlg','forecloPrinWriteOffAmnt','servActivIndicator',\
         'deliqGood','deliqBad','deliqMax'],1,inplace=True)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119696 entries, 0 to 119695
Data columns (total 30 columns):
origChannel           119696 non-null int64
origIntRate           119696 non-null float64
origUPB               119696 non-null int64
origDate              119696 non-null object
origLTV               119696 non-null float64
numBorrowers          119696 non-null float64
origDebtIncRatio      119696 non-null float64
borrCreditScore       119696 non-null float64
loanPurp              119696 non-null int64
zipCode               119696 non-null int64
pMIperct              119696 non-null float64
mortInsType           119696 non-null float64
bestCreditScore       119696 non-null float64
worstCreditScore      119696 non-null float64
avgCreditScore        119696 non-null float64
bankNumber            119696 non-null int64
stateNumber           119696 non-null int64
mSA                   119696 non-null int64
zeroBalCode           119696 non-null float64
fmacRateMax           119696 n

## Engineer originDate into Date features

In [7]:
df['origDate'] = pd.to_datetime(df['origDate'])

In [8]:
df['origYear'] = df['origDate'].dt.year
df['origMonth'] = df['origDate'].dt.month

In [9]:
df.drop(['origDate'],1,inplace=True)

In [10]:
df[['origYear','origMonth']].head()

Unnamed: 0,origYear,origMonth
0,2009,2
1,2009,2
2,2009,2
3,2009,2
4,2009,2


# Turn zero Balance Code Into binary "0" GOOD and "1" BAD

In [11]:
df.zeroBalCode.unique()

array([ 9.,  1.,  3., 16.,  6.,  2., 15.])

In [12]:
df['zeroBalCode'] = df.zeroBalCode.replace(1,0)
df['zeroBalCode'] = df.zeroBalCode.replace([9,3,16,6,2,15],1)

In [13]:
df.zeroBalCode.unique()

array([1., 0.])

In [14]:
#Checking datatypes of individual feature
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119696 entries, 0 to 119695
Data columns (total 31 columns):
origChannel           119696 non-null int64
origIntRate           119696 non-null float64
origUPB               119696 non-null int64
origLTV               119696 non-null float64
numBorrowers          119696 non-null float64
origDebtIncRatio      119696 non-null float64
borrCreditScore       119696 non-null float64
loanPurp              119696 non-null int64
zipCode               119696 non-null int64
pMIperct              119696 non-null float64
mortInsType           119696 non-null float64
bestCreditScore       119696 non-null float64
worstCreditScore      119696 non-null float64
avgCreditScore        119696 non-null float64
bankNumber            119696 non-null int64
stateNumber           119696 non-null int64
mSA                   119696 non-null int64
zeroBalCode           119696 non-null float64
fmacRateMax           119696 non-null float64
fmacRateMin           119696 

# Review values for dates and slice (if needed) the timeframe you want

In [15]:
df.origYear.unique()

array([2009, 2010, 2011, 2012, 2013], dtype=int64)

In [16]:
df['origMonth'].unique()

array([ 2,  3,  5,  4,  6,  7,  8,  9, 10, 11, 12,  1], dtype=int64)

# Coerce the column dtype (int in this case)

In [17]:
df = df.astype({'origLTV':'int','numBorrowers':'int','origDebtIncRatio':'int',\
               'borrCreditScore':'int','mortInsType':'int','bestCreditScore':'int',\
               'worstCreditScore':'int','avgCreditScore':'int','zeroBalCode':'int'})
#,'zeroBalCode':'int'

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119696 entries, 0 to 119695
Data columns (total 31 columns):
origChannel           119696 non-null int64
origIntRate           119696 non-null float64
origUPB               119696 non-null int64
origLTV               119696 non-null int32
numBorrowers          119696 non-null int32
origDebtIncRatio      119696 non-null int32
borrCreditScore       119696 non-null int32
loanPurp              119696 non-null int64
zipCode               119696 non-null int64
pMIperct              119696 non-null float64
mortInsType           119696 non-null int32
bestCreditScore       119696 non-null int32
worstCreditScore      119696 non-null int32
avgCreditScore        119696 non-null int32
bankNumber            119696 non-null int64
stateNumber           119696 non-null int64
mSA                   119696 non-null int64
zeroBalCode           119696 non-null int32
fmacRateMax           119696 non-null float64
fmacRateMin           119696 non-null float64
f

<a id='MLReady'></a>

<a id='MLReady'></a>
## *MLReady.csv file loading point
uncomment one of the boxes below

In [19]:
#df.to_csv('D:\FM_FULL_EPOCH2_MLReady.csv')

In [20]:
#df = pd.read_csv('D:\FM_FULL_EPOCH2_MLReady.csv')

# Oversampling (Since we are starting with only 6.3% of our predicted class)

In [21]:
good = df.zeroBalCode.value_counts()[0]
bad = df.zeroBalCode.value_counts()[1]
perct_bad = round(good/bad,2)
print(f'We have {perct_bad}% Foreclosures in our dataset')

We have 6.34% Foreclosures in our dataset


In [22]:
from sklearn.model_selection import train_test_split

training_features, test_features, \
training_target, test_target, = train_test_split(
    df.drop(['zeroBalCode'], axis=1)
    , df['zeroBalCode']
    , test_size = .1
    , random_state=12
)

In [23]:
# Further split the training data into training/test
x_train, x_val, y_train, y_val = train_test_split(
    training_features
    , training_target
    , test_size = .1
    ,random_state=12
)

In [24]:
# For the training data, randomly sample 
ros = RandomOverSampler(sampling_strategy='minority')
x_train_res, y_train_res = ros.fit_sample(x_train, y_train)

In [25]:
print('#############################################')
print('Before oversampling: "Closed" crushes "Default" and causes issues:')
print(training_target.value_counts())
print('')
print('Before oversampling: "Closed" and "Default" are equal')
print(y_train_res.value_counts())

#############################################
Before oversampling: "Closed" crushes "Default" and causes issues:
0    93081
1    14645
Name: zeroBalCode, dtype: int64

Before oversampling: "Closed" and "Default" are equal
1    83757
0    83757
Name: zeroBalCode, dtype: int64


# Recombine X_train_res, y_train_res so PyCaret can deal with one df

In [26]:
y_train_res.shape

(167514,)

In [27]:
df = x_train_res.copy()

In [28]:
df['zeroBalCode'] = y_train_res

In [29]:
df.shape

(167514, 31)

# Review metrics last time before run (to have record of starting values)

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167514 entries, 0 to 167513
Data columns (total 31 columns):
origChannel           167514 non-null int64
origIntRate           167514 non-null float64
origUPB               167514 non-null int64
origLTV               167514 non-null int32
numBorrowers          167514 non-null int32
origDebtIncRatio      167514 non-null int32
borrCreditScore       167514 non-null int32
loanPurp              167514 non-null int64
zipCode               167514 non-null int64
pMIperct              167514 non-null float64
mortInsType           167514 non-null int32
bestCreditScore       167514 non-null int32
worstCreditScore      167514 non-null int32
avgCreditScore        167514 non-null int32
bankNumber            167514 non-null int64
stateNumber           167514 non-null int64
mSA                   167514 non-null int64
fmacRateMax           167514 non-null float64
fmacRateMin           167514 non-null float64
fmacRateAvg           167514 non-null float64

In [31]:
df.head()

Unnamed: 0,origChannel,origIntRate,origUPB,origLTV,numBorrowers,origDebtIncRatio,borrCreditScore,loanPurp,zipCode,pMIperct,...,fredRate,rateDiffAbove,rateDiffBelow,rateDiffAvg,rateDiffAbovePct,rateDiffBelowPct,rateDiffAvgPct,origYear,origMonth,zeroBalCode
0,3,4.625,139000,77,2,36,786,2,972,0.0,...,2.68,-0.245,0.155,-0.185,-0.050308,0.032427,-0.038462,2009,4,0
1,2,4.375,398000,70,2,42,712,2,917,0.0,...,2.58,0.005,-0.055,0.029,0.001144,-0.012731,0.006673,2010,9,0
2,1,4.5,730000,59,2,43,672,2,928,0.0,...,3.3,-0.3,0.21,-0.255,-0.0625,0.044586,-0.053628,2011,1,0
3,1,4.5,96000,60,1,42,632,1,973,0.0,...,3.3,-0.3,0.21,-0.255,-0.0625,0.044586,-0.053628,2011,1,0
4,1,3.875,111000,85,1,40,669,2,843,12.0,...,2.01,-0.125,0.105,-0.1175,-0.03125,0.026382,-0.02943,2011,11,1


# Pycaret - Setup with categorical definition

### Use complex setup 

In [32]:
model_setup = setup(
    df
    , target = 'zeroBalCode' # PyCaret will list this as "Label"
    , pca = False 
    , ignore_low_variance = True # Variance is calculated using the ratio of unique values to the number of samples, and the ratio of the most common value to the frequency of the second most common value.
    , normalize = True
    , ignore_features = None
    , handle_unknown_categorical = True
    , remove_outliers = True # outliers from the training data are removed using PCA linear dimensionality reduction using the Singular Value Decomposition technique.
    , bin_numeric_features = [
            'origIntRate'
            , 'origUPB'
            , 'origLTV'
            , 'origDebtIncRatio'
            , 'bestCreditScore'
            , 'avgCreditScore'
            , 'worstCreditScore'
        ] # Set to True to bin numerics using K Means
    , feature_selection = True
    , silent = True
    , profile = False
    , categorical_features = [
            'origChannel'
            , 'numBorrowers'
            , 'loanPurp'
            , 'zipCode'
            , 'bankNumber'
            , 'stateNumber'
            , 'origYear'
            , 'origMonth'
            , 'mSA'
        ]
        , numeric_features = [
            'origIntRate'
            , 'origUPB'
            , 'origLTV'
            , 'pMIperct'
            , 'origDebtIncRatio'
            , 'worstCreditScore'
            , 'bestCreditScore'
            , 'avgCreditScore'
            , 'rateDiffAbove'
            , 'rateDiffBelow'
            , 'rateDiffAvg'
            , 'rateDiffAbovePct'
            , 'rateDiffBelowPct'
            , 'rateDiffAvgPct'
        ]
)


 
Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,3214
1,Target Type,Binary
2,Label Encoded,
3,Original Data,"(167514, 31)"
4,Missing Values,False
5,Numeric Features,21
6,Categorical Features,9
7,Ordinal Features,False
8,High Cardinality Features,False
9,High Cardinality Method,


## Compare Models with no blacklist exclusions

In [32]:
model_results=compare_models() #blacklist = ['tr','ransac'],'lar','par','huber','llar','lasso','en','ridge','omp','br','svm'])
model_results

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa
0,Extra Trees Classifier,0.9647,0.9917,0.9696,0.9601,0.9649,0.9294
1,Random Forest Classifier,0.9345,0.9842,0.9664,0.9084,0.9365,0.8691
2,Decision Tree Classifier,0.8847,0.8848,0.9792,0.8235,0.8946,0.7694
3,CatBoost Classifier,0.7748,0.8507,0.8349,0.7452,0.7875,0.5496
4,Light Gradient Boosting Machine,0.746,0.8226,0.8071,0.7191,0.7606,0.4921
5,K Neighbors Classifier,0.7409,0.8173,0.8739,0.6901,0.7712,0.4818
6,Linear Discriminant Analysis,0.7345,0.8079,0.7736,0.7173,0.7444,0.4691
7,Ridge Classifier,0.7344,0.0,0.7738,0.7171,0.7444,0.4689
8,Logistic Regression,0.7333,0.808,0.7685,0.7179,0.7423,0.4667
9,SVM - Linear Kernel,0.7297,0.0,0.7808,0.7091,0.7425,0.4593


## Create models 

### Extra Trees Classifier

In [33]:
et = create_model('et', fold=10)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa
0,0.9663,0.9913,0.9679,0.9649,0.9664,0.9327
1,0.9649,0.9927,0.9731,0.9574,0.9651,0.9298
2,0.9677,0.9922,0.9715,0.9641,0.9678,0.9354
3,0.9647,0.9919,0.9695,0.9602,0.9648,0.9293
4,0.9642,0.9923,0.9706,0.9583,0.9644,0.9284
5,0.9614,0.9911,0.967,0.9563,0.9616,0.9228
6,0.9679,0.993,0.9735,0.9627,0.9681,0.9358
7,0.9652,0.9911,0.9683,0.9623,0.9653,0.9304
8,0.963,0.9907,0.9688,0.9576,0.9632,0.9259
9,0.9617,0.9906,0.9663,0.9575,0.9619,0.9235


#### CatBoost

In [33]:
catboost =create_model('catboost', fold = 10)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa
0,0.7712,0.8511,0.8309,0.7421,0.784,0.5425
1,0.7651,0.8408,0.8219,0.7378,0.7776,0.5302
2,0.7708,0.8491,0.8278,0.7429,0.7831,0.5417
3,0.7646,0.8436,0.835,0.7317,0.7799,0.5292
4,0.7699,0.8449,0.8208,0.7448,0.781,0.5399
5,0.7689,0.8497,0.8273,0.7406,0.7815,0.5379
6,0.7819,0.8565,0.8344,0.7548,0.7926,0.5638
7,0.7662,0.844,0.8177,0.741,0.7775,0.5324
8,0.7648,0.8422,0.8136,0.7409,0.7756,0.5296
9,0.7694,0.8502,0.8293,0.7403,0.7823,0.5388


#### XGBoost

In [35]:
#xgboost = create_model('xgboost', fold = 10)

## Tune Models

#### CatBoost

In [34]:
tuned_catboost = tune_model('catboost') # from fold=50

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa
0,0.8615,0.9303,0.9289,0.8184,0.8702,0.723
1,0.8542,0.9232,0.9163,0.8149,0.8626,0.7084
2,0.857,0.9312,0.9253,0.814,0.8661,0.7141
3,0.8556,0.929,0.934,0.8072,0.866,0.7112
4,0.8521,0.9229,0.9163,0.8119,0.861,0.7043
5,0.8548,0.9274,0.9238,0.8117,0.8641,0.7097
6,0.8579,0.9301,0.9222,0.817,0.8664,0.7159
7,0.8521,0.9224,0.923,0.8082,0.8618,0.7043
8,0.859,0.928,0.9214,0.819,0.8672,0.7181
9,0.8623,0.9355,0.9286,0.8196,0.8707,0.7245


### CatBoost had marked improvements on all metrics!!
#### Improved:  Accuracy AUC   Recall    Precision F1   Kappa
#### Declined:     
                    Accuracy	 AUC	 Recall	 Prec.	  F1	    Kappa
    Untuned:  Mean	0.8661	0.7938	0.0730	0.5686	0.1293	0.1014
    Tuned:    Mean	0.8653	0.7916	0.0385	0.5872	0.0723	0.0564

# Interpret Models

## Extra Tree Classifier (et)

In [None]:
#interpret_model(et plot = 'summary')

In [None]:
#interpret_model(et, plot='correlation', feature='avgCreditScore')

# Predict models

In [35]:
pred_holdout = predict_model(catboost)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa
0,CatBoost Classifier,0.777,0.8541,0.8304,0.75,0.7882,0.554


In [36]:
pred_holdout_tuned = predict_model(tuned_catboost)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa
0,CatBoost Classifier,0.8645,0.9336,0.9286,0.8229,0.8726,0.729


# The golden moment... put 10% sample test data through the model...

In [37]:
predictions = predict_model(tuned_catboost, data=df)

In [39]:
pd.set_option('max_columns',None)
predictions.head()

Unnamed: 0,origChannel,origIntRate,origUPB,origLTV,numBorrowers,origDebtIncRatio,borrCreditScore,loanPurp,zipCode,pMIperct,mortInsType,bestCreditScore,worstCreditScore,avgCreditScore,bankNumber,stateNumber,mSA,fmacRateMax,fmacRateMin,fmacRateAvg,fmacRateVolatility,fredRate,rateDiffAbove,rateDiffBelow,rateDiffAvg,rateDiffAbovePct,rateDiffBelowPct,rateDiffAvgPct,origYear,origMonth,zeroBalCode,Label,Score
0,3,4.625,139000,77,2,36,786,2,972,0.0,0,795,786,790,4,38,38900,4.87,4.78,4.81,0.018828,2.68,-0.245,0.155,-0.185,-0.050308,0.032427,-0.038462,2009,4,0,0,0.3259
1,2,4.375,398000,70,2,42,712,2,917,0.0,0,755,712,733,52,4,40140,4.37,4.32,4.346,0.011574,2.58,0.005,-0.055,0.029,0.001144,-0.012731,0.006673,2010,9,0,0,0.1467
2,1,4.5,730000,59,2,43,672,2,928,0.0,0,748,672,710,4,4,31080,4.8,4.71,4.755,0.019108,3.3,-0.3,0.21,-0.255,-0.0625,0.044586,-0.053628,2011,1,0,0,0.3069
3,1,4.5,96000,60,1,42,632,1,973,0.0,0,632,632,632,54,38,41420,4.8,4.71,4.755,0.019108,3.3,-0.3,0.21,-0.255,-0.0625,0.044586,-0.053628,2011,1,0,1,0.5469
4,1,3.875,111000,85,1,40,669,2,843,12.0,1,669,669,669,54,46,36260,4.0,3.98,3.9925,0.005025,2.01,-0.125,0.105,-0.1175,-0.03125,0.026382,-0.02943,2011,11,1,1,0.8683


In [40]:
results = predictions[['zeroBalCode','Label','Score']]

In [41]:
predictions.Label.value_counts()

0    94550
1    72964
Name: Label, dtype: int64

In [42]:
def calc_confusion(row):
    if ((row['zeroBalCode'] == 0) & (row['Label'] == 0)):
        value = 'TrueNegative'
    elif ((row['zeroBalCode'] == 0) & (row['Label'] == 1)):
        value = 'FalseNegative'
    elif ((row['zeroBalCode'] == 1) & (row['Label'] == 1)):
        value = 'TruePositive'
    elif ((row['zeroBalCode'] == 1) & (row['Label'] == 0)):
        value = 'FalsePositive'
    else:
        value = 'Undefined'
    return value

In [43]:
results['Confusion'] = results.apply(calc_confusion, axis=1)

In [44]:
confusionMatrix = results.Confusion.value_counts().to_dict()

In [46]:
print(f'           ##############################')
print(f'           #             #              #')
print(f'         0 #    {confusionMatrix["TrueNegative"]}    #     {confusionMatrix["FalsePositive"]}    #')
print(f'  True     #             #              #')
print(f'  Class    ##############################')
print(f'           #             #              #')
print(f'         1 #    {confusionMatrix["FalseNegative"]}    #     {confusionMatrix["TruePositive"]}    #')
print(f'           #             #              #')
print(f'           ##############################')
print(f'                  0              1        ')
print(f'                  Predicted Class           ')

           ##############################
           #             #              #
         0 #    66986    #     27564    #
  True     #             #              #
  Class    ##############################
           #             #              #
         1 #    16771    #     56193    #
           #             #              #
           ##############################
                  0              1        
                  Predicted Class           


In [47]:
# 0 or 2 = Predict equals truth, 1 = Predict doesn't equal truth
totalNegative = confusionMatrix['TrueNegative'] + confusionMatrix['FalseNegative']
totalPositive = confusionMatrix['TruePositive'] + confusionMatrix['FalsePositive']
trueNegativePct = confusionMatrix['TrueNegative'] / totalNegative * 100
falseNegativePct = confusionMatrix['FalseNegative'] / totalNegative * 100
truePositivePct = confusionMatrix['TruePositive'] / totalPositive * 100
falsePositivePct = confusionMatrix['FalsePositive'] / totalPositive * 100

print(f'TrueNegative:  {round(trueNegativePct,2)}%')
print(f'FalseNegative:  {round(falseNegativePct,2)}%')
print(f'TruePosiive:   {round(truePositivePct,2)}%')
print(f'FalsePositive: {round(falsePositivePct,2)}%')

TrueNegative:  79.98%
FalseNegative:  20.02%
TruePosiive:   67.09%
FalsePositive: 32.91%


In [49]:
save_model(tuned_catboost, 'tunedCatboost_EPOCH2_Over50_50_052520')

Transformation Pipeline and Model Succesfully Saved
