# **Fitting Logistic Regression,SVM,Decision Tree Classifier Models on UniversalBank dataset**

## **SetUp**

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV


In [None]:
np.random.seed(1)

## **Load the Data**

In [None]:
UB=pd.read_csv("./Downloads/UniversalBank.csv")

In [None]:
UB.info()

In [145]:
UB.describe()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,2500.5,45.3384,20.1046,73.7742,93152.503,2.3964,1.937938,1.881,56.4988,0.096,0.1044,0.0604,0.5968,0.294
std,1443.520003,11.463166,11.467954,46.033729,2121.852197,1.147663,1.747659,0.839869,101.713802,0.294621,0.305809,0.23825,0.490589,0.455637
min,1.0,23.0,-3.0,8.0,9307.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1250.75,35.0,10.0,39.0,91911.0,1.0,0.7,1.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2500.5,45.0,20.0,64.0,93437.0,2.0,1.5,2.0,0.0,0.0,0.0,0.0,1.0,0.0
75%,3750.25,55.0,30.0,98.0,94608.0,3.0,2.5,3.0,101.0,0.0,0.0,0.0,1.0,1.0
max,5000.0,67.0,43.0,224.0,96651.0,4.0,10.0,3.0,635.0,1.0,1.0,1.0,1.0,1.0


In [146]:
#checking if there are any na's
UB.isna().sum() 

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [147]:
#to check if there are any categorical variables
category_var_list = list(UB.select_dtypes(include='object').columns)
category_var_list

[]

In [148]:
#checking counts of data in target variable
UB['CD Account'].value_counts()

0    4698
1     302
Name: CD Account, dtype: int64

By looking at the counts above, we can say that dataset is imbalanced.

## **To Address data imbalance**

In [149]:
cls0 = UB[UB['CD Account']==0]
cls1 = UB[UB['CD Account']==1]

In [150]:
#Oversampling the minority class to make the dataset balanced
from sklearn.utils import resample
UB_minority_resampled = resample(cls1, 
                                 replace=True,     
                                 n_samples=4698,    
                                 random_state=123)

In [151]:
print(cls0.shape,UB_minority_resampled.shape)

(4698, 14) (4698, 14)


In [152]:
#concat both to get the final dataset
UB_df=pd.concat([cls0,UB_minority_resampled])

In [153]:
UB_df.corr()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
ID,1.0,-0.050048,-0.052607,-0.051682,0.022977,-0.035039,-0.032528,0.015015,-0.007804,-0.076171,-0.0059,-0.013241,-0.0125,0.065827
Age,-0.050048,1.0,0.99429,-0.02457,-0.05317,-0.010719,-0.030699,0.003678,0.008136,0.016674,0.009411,0.012238,0.013203,0.008568
Experience,-0.052607,0.99429,1.0,-0.014554,-0.054266,-0.010217,-0.030962,-0.024488,0.008253,0.018358,0.010187,0.016343,0.016381,0.011596
Income,-0.051682,-0.02457,-0.014554,1.0,-0.001442,-0.067693,0.654625,-0.016276,0.261282,0.684867,-0.015684,0.325348,0.113227,0.096495
ZIP Code,0.022977,-0.05317,-0.054266,-0.001442,1.0,0.001937,0.037238,-0.040839,0.014983,-0.007086,0.054157,0.048307,0.018528,0.041503
Family,-0.035039,-0.010719,-0.010217,-0.067693,0.001937,1.0,-0.073415,0.018735,0.037446,0.097435,-0.039309,0.030974,0.038809,0.031432
CCAvg,-0.032528,-0.030699,-0.030962,0.654625,0.037238,-0.073415,1.0,-0.018659,0.163664,0.482904,-0.003554,0.255327,0.068332,0.053998
Education,0.015015,0.003678,-0.024488,-0.016276,-0.040839,0.018735,-0.018659,1.0,0.062692,0.185192,-0.046437,0.037454,-0.001167,0.014995
Mortgage,-0.007804,0.008136,0.008253,0.261282,0.014983,0.037446,0.163664,0.062692,1.0,0.228812,-0.022352,0.153617,0.059405,0.065788
Personal Loan,-0.076171,0.016674,0.018358,0.684867,-0.007086,0.097435,0.482904,0.185192,0.228812,1.0,0.044699,0.449772,0.127025,0.086281


In [154]:
#dropping the columns that are least significant 
UB_df.drop(['ID', 'ZIP Code'], axis=1, inplace = True)

## **Split Data**

In [155]:
train_df, test_df = train_test_split(UB_df, test_size=0.3)
target='CD Account'
predictors=list(UB_df.columns)
predictors.remove(target)

In [156]:
#Standardizing the numerical columns as svm is scale sensitive
scaler = preprocessing.StandardScaler()
cols_to_stdize = [ 'Age', 'Experience', 
                   'Income', 'Family', 'CCAvg', 
                   'Education', 'Mortgage']                
               
# Transform the predictors of training and validation sets
train_df[cols_to_stdize] = scaler.fit_transform(train_df[cols_to_stdize]) # train_predictors is not a numpy array


test_df[cols_to_stdize] = scaler.transform(test_df[cols_to_stdize]) # validation_target is now a series object

In [157]:
train_X=train_df[predictors]
train_y = train_df[target] 
test_X = test_df[predictors]
test_y = test_df[target]

In [179]:
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

## **Fitting Logistic Regression**

In [181]:
log_reg_model = LogisticRegression(penalty='none', max_iter=900)
_ = log_reg_model.fit(train_X, np.ravel(train_y))
model_preds = log_reg_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"default logistic", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [182]:
#Liblinear Solver
log_reg_liblin_model = LogisticRegression(solver='liblinear').fit(train_X, np.ravel(train_y))
model_preds = log_reg_liblin_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"liblinear logistic", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [183]:
#L2 Regularization
log_reg_L2_model = LogisticRegression(penalty='l2', max_iter=1000)
_ = log_reg_L2_model.fit(train_X, np.ravel(train_y))
model_preds = log_reg_L2_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"L2 logistic", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [184]:
#L1 Regularization
log_reg_L1_model = LogisticRegression(solver='liblinear', penalty='l1')
_ = log_reg_L1_model.fit(train_X, np.ravel(train_y))
model_preds = log_reg_L1_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"L1 logistic", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [185]:
#ElasticNet Regularization
log_reg_elastic_model = LogisticRegression(solver='saga', penalty='elasticnet', l1_ratio=0.5, max_iter=1000)
_ = log_reg_elastic_model.fit(train_X, np.ravel(train_y))
model_preds = log_reg_elastic_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Elastic logistic", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [186]:
#Using Random search on Logistic Regression
score_measure="recall"
kfolds=5
param_grid = {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'C' : np.logspace(-4, 4, 20),
    'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],
    'max_iter' : np.arange(100,1000)
    }
                      
lr = LogisticRegression()
rand_search = RandomizedSearchCV(estimator = lr, param_distributions=param_grid, cv=kfolds, n_iter=500,
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = rand_search.fit(train_X, train_y)

print(f"The best {score_measure} score is {rand_search.best_score_}")
print(f"... with parameters: {rand_search.best_params_}")

bestRecallTree = rand_search.best_estimator_
import warnings
warnings.filterwarnings("ignore")

Fitting 5 folds for each of 500 candidates, totalling 2500 fits
The best recall score is 1.0
... with parameters: {'solver': 'saga', 'penalty': 'l1', 'max_iter': 974, 'C': 0.0001}


In [187]:
c_matrix = confusion_matrix(test_y, rand_search.predict(test_X))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Logistic Regression Random Search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [188]:
#Conducting Grid search around the values from Random search
score_measure = "recall"
kfolds = 5

param_grid = {
    'penalty' : ['l1'],
    'C' : np.arange(0.0001,0.1),
    'solver' : ['saga'],
    'max_iter' : np.arange(974,1000)
}

grid_search = GridSearchCV(estimator = lr, param_grid=param_grid, cv=kfolds, 
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = grid_search.fit(train_X, train_y)

print(f"The best {score_measure} score is {grid_search.best_score_}")
print(f"... with parameters: {grid_search.best_params_}")

bestRecallTree = grid_search.best_estimator_

Fitting 5 folds for each of 26 candidates, totalling 130 fits
The best recall score is 1.0
... with parameters: {'C': 0.0001, 'max_iter': 976, 'penalty': 'l1', 'solver': 'saga'}


In [189]:
c_matrix = confusion_matrix(test_y, grid_search.predict(test_X))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Logistic Regression Grid Search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

## **Fitting Support Vector Machine Model on the data**

In [190]:
#SVM using linear kernel
svm_lin_model = SVC(kernel="linear",probability=True)
_ = svm_lin_model.fit(train_X, np.ravel(train_y))
model_preds = svm_lin_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"linear svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [191]:
#SVM using RBF Kernel
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale')
_ = svm_rbf_model.fit(train_X, np.ravel(train_y))
model_preds = svm_rbf_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"rbf svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [192]:
#SVM using poly kernel
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=1,probability=True)
_ = svm_poly_model.fit(train_X, np.ravel(train_y))
model_preds = svm_poly_model.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"poly svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [193]:
#Using Random search on SVM
score_measure="recall"
kfolds=5
param_grid = {'C': np.arange(1,50),
              'degree': np.arange(1,25),
              'gamma':np.arange(0.1,1),
              'kernel': ['linear','rbf','poly']
              }        
svm = SVC()
rand_search = RandomizedSearchCV(estimator = svm, param_distributions=param_grid, cv=kfolds, n_iter=500,
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = rand_search.fit(train_X, train_y)

print(f"The best {score_measure} score is {rand_search.best_score_}")
print(f"... with parameters: {rand_search.best_params_}")

bestRecallTree = rand_search.best_estimator_
import warnings
warnings.filterwarnings("ignore")

Fitting 5 folds for each of 500 candidates, totalling 2500 fits
The best recall score is 1.0
... with parameters: {'kernel': 'linear', 'gamma': 0.1, 'degree': 11, 'C': 28}


In [194]:
c_matrix = confusion_matrix(test_y, rand_search.predict(test_X))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"SVM Random Search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [195]:
#Conducting Grid search around the values from Random search
score_measure = "recall"
kfolds = 5

param_grid = {
    'C': np.arange(25,40),
              'degree': np.arange(10,20),
              'gamma':np.arange(0.09,0.5),
              'kernel': ['linear']
}

grid_search = GridSearchCV(estimator = svm, param_grid=param_grid, cv=kfolds, 
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = grid_search.fit(train_X, train_y)

print(f"The best {score_measure} score is {grid_search.best_score_}")
print(f"... with parameters: {grid_search.best_params_}")

bestRecallTree = grid_search.best_estimator_

Fitting 5 folds for each of 150 candidates, totalling 750 fits
The best recall score is 1.0
... with parameters: {'C': 25, 'degree': 10, 'gamma': 0.09, 'kernel': 'linear'}


In [196]:
c_matrix = confusion_matrix(test_y, rand_search.predict(test_X))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"SVM Grid Search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [197]:
#Decisiontree Classifier
Dt=DecisionTreeClassifier(max_depth=10)
Dt=Dt.fit(train_X,np.ravel(train_y))
model_preds=Dt.predict(test_X)
c_matrix = confusion_matrix(test_y, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Decision Tree Classifier", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [198]:
#Using Random search on Decision Tree Classifier
score_measure = "recall"
kfolds = 5

param_grid = {
    'min_samples_split': np.arange(1,40),  
    'min_samples_leaf': np.arange(1,40),
    'min_impurity_decrease': np.arange(0.0001, 0.01, 0.0005),
    'max_leaf_nodes': np.arange(5, 200), 
    'max_depth': np.arange(1,50), 
    'criterion': ['entropy', 'gini'],
}


rand_search = RandomizedSearchCV(estimator = Dt, param_distributions=param_grid, cv=kfolds, n_iter=500,
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = rand_search.fit(train_X, train_y)

print(f"The best {score_measure} score is {rand_search.best_score_}")
print(f"... with parameters: {rand_search.best_params_}")

bestRecallTree = rand_search.best_estimator_
import warnings
warnings.filterwarnings("ignore")

Fitting 5 folds for each of 500 candidates, totalling 2500 fits
The best recall score is 1.0
... with parameters: {'min_samples_split': 19, 'min_samples_leaf': 30, 'min_impurity_decrease': 0.0071, 'max_leaf_nodes': 150, 'max_depth': 12, 'criterion': 'entropy'}


In [199]:
c_matrix = confusion_matrix(test_y, rand_search.predict(test_X))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Decision Tree Random Search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [201]:
#Conducting Grid search around the values from Random search
score_measure = "recall"
kfolds = 5

param_grid = {
    'min_samples_split': np.arange(15,25),  
    'min_samples_leaf': np.arange(25,35),
    'min_impurity_decrease': np.arange(0.0060, 0.0080, 0.001),
    'max_leaf_nodes': np.arange(140,180), 
    'max_depth': np.arange(10,20), 
    'criterion': ['entropy'],
}

grid_search = GridSearchCV(estimator = Dt, param_grid=param_grid, cv=kfolds, 
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = grid_search.fit(train_X, train_y)

print(f"The best {score_measure} score is {grid_search.best_score_}")
print(f"... with parameters: {grid_search.best_params_}")

bestRecallTree = grid_search.best_estimator_

Fitting 5 folds for each of 80000 candidates, totalling 400000 fits
The best recall score is 1.0
... with parameters: {'criterion': 'entropy', 'max_depth': 10, 'max_leaf_nodes': 140, 'min_impurity_decrease': 0.007, 'min_samples_leaf': 25, 'min_samples_split': 15}


In [202]:
c_matrix = confusion_matrix(test_y, grid_search.predict(test_X))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Decision Tree Grid Search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [214]:
performance.loc[:,["model","Accuracy","Recall","F1"]].sort_values(by=['Recall'],ascending=False)

Unnamed: 0,model,Accuracy,Recall,F1
0,linear svm,0.876907,1.0,0.885516
0,SVM Random Search,0.876907,1.0,0.885516
0,SVM Grid Search,0.876907,1.0,0.885516
0,Decision Tree Random Search,0.876907,1.0,0.885516
0,Decision Tree Grid Search,0.876907,1.0,0.885516
0,Decision Tree Classifier,0.938985,0.993294,0.939394
0,rbf svm,0.948209,0.992548,0.948043
0,poly svm,0.926215,0.980626,0.926761
0,L1 logistic,0.87868,0.962742,0.883117
0,default logistic,0.879745,0.958271,0.883545


Looking at the above results, the top 5 models have the same Recall value. In this case, the metric we have to consider is Recall because it's important to not have False Positives(Customers who doesn't have CD Account predicted as they have a CD Account) predicted by a model.So the best models on this dataset are Linear SVM, SVM using Random search and Grid Search, Decision Tree using Random Search and Grid Search.