# Import Libraries

In [24]:
# import numpy and pandas libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier 
from matplotlib import pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
np.random.seed(1)
from sklearn.impute import SimpleImputer
# set random seed to ensure that results are repeatable
np.random.seed(1)

# Loading the Data

In [2]:
UBank = pd.read_csv("UniversalBank.csv")

UBank.head(3)

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0


# Droping Unnecessary Columns

In [4]:
UBank=UBank.drop(["ID","ZIP Code"],axis=1)

In [5]:
UBank.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,25,1,49,4,1.6,1,0,0,1,0,0,0
1,45,19,34,3,1.5,1,0,0,1,0,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,0,1


# Creating Dummies

In [6]:
UBank=pd.get_dummies(UBank, prefix=['Education'], columns=['Education'])
UBank.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard,Education_1,Education_2,Education_3
0,25,1,49,4,1.6,0,0,1,0,0,0,1,0,0
1,45,19,34,3,1.5,0,0,1,0,0,0,1,0,0
2,39,15,11,1,1.0,0,0,0,0,0,0,1,0,0
3,35,9,100,1,2.7,0,0,0,0,0,0,0,1,0
4,35,8,45,4,1.0,0,0,0,0,0,1,0,1,0


# Splitting of Dependent variable and Independent variable

In [10]:
X = UBank[['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Mortgage',
       'Personal Loan', 'Securities Account', 'Online',
       'CreditCard', 'Education_1', 'Education_2', 'Education_3']]
y = UBank[['CD Account']]

# Splitting data into test set and train set

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
scaler = StandardScaler()

In [15]:
list_21 =['Age', 'Experience', 'Income', 'ZIP_Code', 'CCAvg', 'Mortgage']

In [17]:
X_train[['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']] = scaler.fit_transform(X_train[['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']])
X_test[['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']] = scaler.transform(X_test[['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']])

# Model the data

In [18]:
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

# Logistic Regression Model

In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [21]:
log_reg_model = LogisticRegression()
_ = log_reg_model.fit(X_train, np.ravel(y_train))

In [22]:
model_preds = log_reg_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"default logistic", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,default logistic,0.978,1.0,0.60241,0.75188


# RandomizedSearchCV Logistic Regression

In [25]:
score_measure = "recall"
LR=LogisticRegression()
kfolds = 5
param_grid = {'C': [0.1, 1, 10,0.001], 
              "solver" : [ 'lbfgs', 'liblinear','saga'],
              "penalty" : ['l1','l2','lasso','elastic']} 
  
grid = RandomizedSearchCV(LR, param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train, y_train)
print(f"The best {score_measure} score is {grid.best_score_}")
print(f"... with parameters: {grid.best_params_}")

bestRecallTree = grid.best_estimator_

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END .C=10, penalty=elastic, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END .C=10, penalty=elastic, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END .C=10, penalty=elastic, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END .C=10, penalty=elastic, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END .C=10, penalty=elastic, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END ...C=1, penalty=elastic, solver=saga;, score=nan total time=   0.0s
[CV 2/5] END ...C=1, penalty=elastic, solver=saga;, score=nan total time=   0.0s
[CV 3/5] END ...C=1, penalty=elastic, solver=saga;, score=nan total time=   0.0s
[CV 4/5] END ...C=1, penalty=elastic, solver=saga;, score=nan total time=   0.0s
[CV 5/5] END ...C=1, penalty=elastic, solver=saga;, score=nan total time=   0.0s
[CV 1/5] END ..C=0.001, penalty=l1, solver=saga;, score=0.939 total time=   0.0s
[CV 2/5] END ..C=0.001, penalty=l1, solver=saga;

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END ....C=0.1, penalty=l1, solver=saga;, score=0.980 total time=   0.0s
[CV 4/5] END ....C=0.1, penalty=l1, solver=saga;, score=0.977 total time=   0.0s
[CV 5/5] END ....C=0.1, penalty=l1, solver=saga;, score=0.983 total time=   0.0s
[CV 1/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.961 total time=   0.0s
[CV 2/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.964 total time=   0.0s
[CV 3/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.969 total time=   0.0s
[CV 4/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.970 total time=   0.0s
[CV 5/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.970 total time=   0.0s
[CV 1/5] END C=10, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=10, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END C=10, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END C=10, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 5/5] END C=10, penal

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .....C=10, penalty=l1, solver=saga;, score=0.974 total time=   0.0s
[CV 2/5] END .....C=10, penalty=l1, solver=saga;, score=0.981 total time=   0.0s
[CV 3/5] END .....C=10, penalty=l1, solver=saga;, score=0.980 total time=   0.0s
[CV 4/5] END .....C=10, penalty=l1, solver=saga;, score=0.977 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=10, penalty=l1, solver=saga;, score=0.983 total time=   0.0s
[CV 1/5] END .C=0.001, penalty=lasso, solver=saga;, score=nan total time=   0.0s
[CV 2/5] END .C=0.001, penalty=lasso, solver=saga;, score=nan total time=   0.0s
[CV 3/5] END .C=0.001, penalty=lasso, solver=saga;, score=nan total time=   0.0s
[CV 4/5] END .C=0.001, penalty=lasso, solver=saga;, score=nan total time=   0.0s
[CV 5/5] END .C=0.001, penalty=lasso, solver=saga;, score=nan total time=   0.0s
[CV 1/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.974 total time=   0.0s
[CV 2/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.981 total time=   0.0s
[CV 3/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.980 total time=   0.0s
[CV 4/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.977 total time=   0.0s
[CV 5/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.983 total time=   0.0s
The best recall score is 0.9791428571428572
... with parameters: {'solver': 'saga', 'penalty': 'l1', 'C': 0.1

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\dkrre\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\dkrre\anaconda3\lib\site

In [26]:
model_preds = grid.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Logistic Regression Randomised", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# GridSearchCV Logistic Regression

In [27]:
score_measure = "recall"
kfolds = 5
param_grid = {'C': [0.1, 1, 10], 
              'solver' : [ 'lbfgs', 'liblinear'],
              'penalty' : ['l1','l2','lasso','elastic']} 
  
grid = GridSearchCV(LogisticRegression(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train, y_train)
print(f"The best {score_measure} score is {grid.best_score_}")
print(f"... with parameters: {grid.best_params_}")

bestRecallTree = grid.best_estimator_

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 1/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 2/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.981 total time=   0.0s
[CV 3/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 5/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.961 total time=   0.0s
[CV 2/5] END ...C=0.1, penalty=l2, solver=

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.970 total time=   0.0s
[CV 1/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.954 total time=   0.0s
[CV 2/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.956 total time=   0.0s
[CV 3/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.957 total time=   0.0s
[CV 4/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.961 total time=   0.0s
[CV 5/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.959 total time=   0.0s
[CV 1/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=0.1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=0.1, p

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END .C=1, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 5/5] END .C=1, penalty=l1, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END .....C=1, penalty=l2, solver=lbfgs;, score=0.974 total time=   0.0s
[CV 2/5] END .....C=1, penalty=l2, solver=lbfgs;, score=0.981 total time=   0.0s
[CV 3/5] END .....C=1, penalty=l2, solver=lbfgs;, score=0.980 total time=   0.0s
[CV 4/5] END .....C=1, penalty=l2, solver=lbfgs;, score=0.977 total time=   0.0s
[CV 5/5] END .....C=1, penalty=l2, solver=lbfgs;, score=0.983 total time=   0.0s
[CV 1/5] END .C=1, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 2/5] END .C=1, penalty=l2, solver=liblinear;, score=0.981 total time=   0.0s
[CV 3/5] END .C=1, penalty=l2, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/5] END .C=1, penalty=l2, solver=liblinear;, score=0.977 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END .C=1, penalty=l2, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END C=1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END C=1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 5/5] END C=1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 1/5] END ..C=1, penalty=elastic, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ..C=1, penalty=

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END C=10, penalty=l1, solver=liblinear;, score=0.980 total time=   0.1s
[CV 4/5] END C=10, penalty=l1, solver=liblinear;, score=0.977 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END C=10, penalty=l1, solver=liblinear;, score=0.983 total time=   0.1s
[CV 1/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.974 total time=   0.0s
[CV 2/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.981 total time=   0.0s
[CV 3/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.980 total time=   0.0s
[CV 4/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.977 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


[CV 5/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.983 total time=   0.0s
[CV 1/5] END C=10, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 2/5] END C=10, penalty=l2, solver=liblinear;, score=0.981 total time=   0.0s
[CV 3/5] END C=10, penalty=l2, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/5] END C=10, penalty=l2, solver=liblinear;, score=0.977 total time=   0.0s
[CV 5/5] END C=10, penalty=l2, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ...C=10, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ...C=10, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END ...C=10, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END ...C=10, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END ...C=10, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=10, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=10, penalty=

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
75 fits failed out of a total of 120.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\dkrre\anaconda3\lib\site-pac

In [28]:
model_preds = grid.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Logistic Regression Grid", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# SVM Classification model with Linear Kernel

In [29]:
svm_lin_model = SVC(kernel="linear", probability=True)
_ = svm_lin_model.fit(X_train, np.ravel(y_train))

In [30]:
model_preds = svm_lin_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm with linear kernel", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,default logistic,0.978,1.0,0.60241,0.75188
0,Logistic Regression Randomised,0.978,1.0,0.60241,0.75188
0,Logistic Regression Grid,0.978,1.0,0.60241,0.75188
0,svm with linear kernel,0.978,1.0,0.60241,0.75188


# SVM Classification model with rbf Kernel

In [31]:
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale', probability=True)
_ = svm_rbf_model.fit(X_train, np.ravel(y_train))

In [32]:
model_preds = svm_rbf_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm with rbf kernel", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,default logistic,0.978,1.0,0.60241,0.75188
0,Logistic Regression Randomised,0.978,1.0,0.60241,0.75188
0,Logistic Regression Grid,0.978,1.0,0.60241,0.75188
0,svm with linear kernel,0.978,1.0,0.60241,0.75188
0,svm with rbf kernel,0.975333,0.925926,0.60241,0.729927


## SVM Classification model with Polynomial Kernel

In [33]:
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10, probability=True)
_ = svm_poly_model.fit(X_train, np.ravel(y_train))

In [34]:
model_preds = svm_poly_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm with polynomial kernel", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,default logistic,0.978,1.0,0.60241,0.75188
0,Logistic Regression Randomised,0.978,1.0,0.60241,0.75188
0,Logistic Regression Grid,0.978,1.0,0.60241,0.75188
0,svm with linear kernel,0.978,1.0,0.60241,0.75188
0,svm with rbf kernel,0.975333,0.925926,0.60241,0.729927
0,svm with polynomial kernel,0.975333,0.925926,0.60241,0.729927


# RandomizedSearchCV SVM

In [35]:
score_measure = "recall"
kfolds = 5
param_grid = {'C': [0.1, 1, 10], 
              'gamma': [1, 0.1, 0.01, 0.001],
              'kernel': ['linear','poly','rbf']} 
  
rand_search = RandomizedSearchCV(SVC(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train, y_train)
print(f"The best {score_measure} score is {rand_search.best_score_}")
print(f"... with parameters: {rand_search.best_params_}")

bestRecallTree = rand_search.best_estimator_

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 1/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END .....C=0.1, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 2/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.981 total time=   0.0s
[CV 3/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 5/5] END C=0.1, penalty=l1, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ...C=0.1, penalty=l2, solver=lbfgs;, score=0.961 total time=   0.0s
[CV 2/5] END ...C=0.1, penalty=l2, solver=

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.956 total time=   0.0s
[CV 3/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.957 total time=   0.0s
[CV 4/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.961 total time=   0.0s
[CV 5/5] END C=0.1, penalty=l2, solver=liblinear;, score=0.959 total time=   0.0s
[CV 1/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END ..C=0.1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=0.1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=0.1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END C=0.1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END C=0.1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=1, penalty=l2, solver=lbfgs;, score=0.983 total time=   0.0s
[CV 1/5] END .C=1, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 2/5] END .C=1, penalty=l2, solver=liblinear;, score=0.981 total time=   0.0s
[CV 3/5] END .C=1, penalty=l2, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/5] END .C=1, penalty=l2, solver=liblinear;, score=0.977 total time=   0.0s
[CV 5/5] END .C=1, penalty=l2, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 4/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 5/5] END ....C=1, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/5] END C=1, penalty=lasso, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=1, penalty=la

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END C=10, penalty=l1, solver=liblinear;, score=0.974 total time=   0.1s
[CV 2/5] END C=10, penalty=l1, solver=liblinear;, score=0.981 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END C=10, penalty=l1, solver=liblinear;, score=0.980 total time=   0.1s
[CV 4/5] END C=10, penalty=l1, solver=liblinear;, score=0.977 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)


[CV 5/5] END C=10, penalty=l1, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.974 total time=   0.0s
[CV 2/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.981 total time=   0.0s
[CV 3/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.980 total time=   0.0s
[CV 4/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.977 total time=   0.0s
[CV 5/5] END ....C=10, penalty=l2, solver=lbfgs;, score=0.983 total time=   0.0s
[CV 1/5] END C=10, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 2/5] END C=10, penalty=l2, solver=liblinear;, score=0.981 total time=   0.0s
[CV 3/5] END C=10, penalty=l2, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/5] END C=10, penalty=l2, solver=liblinear;, score=0.977 total time=   0.0s
[CV 5/5] END C=10, penalty=l2, solver=liblinear;, score=0.983 total time=   0.0s
[CV 1/5] END ...C=10, penalty=lasso, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/5] END ...C=10, penalt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
75 fits failed out of a total of 120.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\dkrre\anaconda3\lib\site-pac

AttributeError: 'RandomizedSearchCV' object has no attribute 'best_score_'

In [None]:
c_matrix = confusion_matrix(y_test, rand_search.predict(X_test))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Random search SVM Linear", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# GridSearchCV SVM

In [None]:
score_measure = "recall"
kfolds = 5
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['linear','poly','rbf']} 
  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train, y_train)
print(f"The best {score_measure} score is {grid.best_score_}")
print(f"... with parameters: {grid.best_params_}")

bestRecallTree = grid.best_estimator_

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.970 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.967 total time=   0.2s
[CV 3/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.961 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.963 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.969 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.939 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.937 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.937 total time=   0.1s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.937 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.937 total time=   0.2s
[CV 1/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.1, kernel=poly;, score=0.961 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END .....C=0.1, gamma=0.1, kernel=poly;, score=0.956 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=0.1, kernel=poly;, score=0.964 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=0.1, kernel=poly;, score=0.964 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=0.1, kernel=poly;, score=0.961 total time=   0.0s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.939 total time=   0.0s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.937 total time=   0.0s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.937 total time=   0.0s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.977 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=0.01, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ....C=0.1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ....C=0.1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ....C=0.1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ....C=0.1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.939 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.937 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.937 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.939 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.939 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 3/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 4/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 5/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.974 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END ..C=0.1, gamma=0.0001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ..C=0.1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ..C=0.1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ..C=0.1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ..C=0.1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.939 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END .......C=1, gamma=1, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END .......C=1, gamma=1, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END .......C=1, gamma=1, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END .......C=1, gamma=1, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END .......C=1, gamma=1, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .........C=1, gamma=1, kernel=poly;, score=0.957 total time=   0.8s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .........C=1, gamma=1, kernel=poly;, score=0.954 total time=   1.0s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .........C=1, gamma=1, kernel=poly;, score=0.956 total time=   0.5s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .........C=1, gamma=1, kernel=poly;, score=0.954 total time=   0.6s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .........C=1, gamma=1, kernel=poly;, score=0.960 total time=   0.6s


  y = column_or_1d(y, warn=True)


[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.941 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.939 total time=   0.3s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.940 total time=   0.3s


  y = column_or_1d(y, warn=True)


[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.937 total time=   0.3s


  y = column_or_1d(y, warn=True)


[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.940 total time=   0.3s
[CV 1/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .......C=1, gamma=0.1, kernel=poly;, score=0.970 total time=   0.0s
[CV 2/5] END .......C=1, gamma=0.1, kernel=poly;, score=0.980 total time=   0.0s
[CV 3/5] END .......C=1, gamma=0.1, kernel=poly;, score=0.977 total time=   0.0s
[CV 4/5] END .......C=1, gamma=0.1, kernel=poly;, score=0.976 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END .......C=1, gamma=0.1, kernel=poly;, score=0.979 total time=   0.0s
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.969 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.979 total time=   0.0s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.980 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.973 total time=   0.0s
[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.974 total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END ......C=1, gamma=0.01, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ......C=1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END ......C=1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ......C=1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ......C=1, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.941 total time=   0.0s
[CV 2/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.939 total time=   0.0s
[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.940 total time=   0.0s
[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.974 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END .....C=1, gamma=0.001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END .....C=1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END .....C=1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END .....C=1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END .....C=1, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.939 total time=   0.0s
[CV 2/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 4/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.980 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END ....C=1, gamma=0.0001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ....C=1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ....C=1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ....C=1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ....C=1, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.939 total time=   0.0s
[CV 2/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 3/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 4/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END ......C=10, gamma=1, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ......C=10, gamma=1, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ......C=10, gamma=1, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ......C=10, gamma=1, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ......C=10, gamma=1, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ........C=10, gamma=1, kernel=poly;, score=0.947 total time=   2.0s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ........C=10, gamma=1, kernel=poly;, score=0.941 total time=   2.7s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ........C=10, gamma=1, kernel=poly;, score=0.949 total time=   1.9s


  y = column_or_1d(y, warn=True)


[CV 4/5] END ........C=10, gamma=1, kernel=poly;, score=0.947 total time=   2.5s


  y = column_or_1d(y, warn=True)


[CV 5/5] END ........C=10, gamma=1, kernel=poly;, score=0.956 total time=   3.4s


  y = column_or_1d(y, warn=True)


[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.934 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.937 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.940 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.939 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.939 total time=   0.2s
[CV 1/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ......C=10, gamma=0.1, kernel=poly;, score=0.971 total time=   0.0s
[CV 2/5] END ......C=10, gamma=0.1, kernel=poly;, score=0.976 total time=   0.0s
[CV 3/5] END ......C=10, gamma=0.1, kernel=poly;, score=0.976 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ......C=10, gamma=0.1, kernel=poly;, score=0.971 total time=   0.0s
[CV 5/5] END ......C=10, gamma=0.1, kernel=poly;, score=0.979 total time=   0.0s
[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.971 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.980 total time=   0.0s
[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.979 total time=   0.0s
[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.971 total time=   0.0s
[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.977 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.983 total time=   0.0s
[CV 1/5] END .....C=10, gamma=0.01, kernel=poly;, score=0.939 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END .....C=10, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END .....C=10, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END .....C=10, gamma=0.01, kernel=poly;, score=0.939 total time=   0.0s
[CV 5/5] END .....C=10, gamma=0.01, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.974 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.981 total time=   0.0s
[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.980 total time=   0.0s
[CV 4/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.977 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.983 total time=   0.0s
[CV 1/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ....C=10, gamma=0.001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ....C=10, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ....C=10, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ....C=10, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ....C=10, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.941 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.939 total time=   0.0s
[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.939 total time=   0.0s
[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.940 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 1/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.974 total time=   0.0s
[CV 2/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.981 total time=   0.0s
[CV 3/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.980 total time=   0.0s
[CV 4/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.977 total time=   0.0s
[CV 5/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ...C=10, gamma=0.0001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ...C=10, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ...C=10, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ...C=10, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ...C=10, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.939 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s
[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .....C=100, gamma=1, kernel=linear;, score=0.974 total time=  12.3s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .....C=100, gamma=1, kernel=linear;, score=0.981 total time=  10.8s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .....C=100, gamma=1, kernel=linear;, score=0.980 total time=   9.8s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .....C=100, gamma=1, kernel=linear;, score=0.977 total time=  18.9s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=100, gamma=1, kernel=linear;, score=0.983 total time=   9.4s


  y = column_or_1d(y, warn=True)


[CV 1/5] END .......C=100, gamma=1, kernel=poly;, score=0.950 total time=   4.0s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .......C=100, gamma=1, kernel=poly;, score=0.943 total time=   7.7s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .......C=100, gamma=1, kernel=poly;, score=0.946 total time=  11.1s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .......C=100, gamma=1, kernel=poly;, score=0.937 total time=   7.1s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .......C=100, gamma=1, kernel=poly;, score=0.953 total time=  15.6s


  y = column_or_1d(y, warn=True)


[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.933 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ........C=100, gamma=1, kernel=rbf;, score=0.933 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ........C=100, gamma=1, kernel=rbf;, score=0.941 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 4/5] END ........C=100, gamma=1, kernel=rbf;, score=0.939 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 5/5] END ........C=100, gamma=1, kernel=rbf;, score=0.937 total time=   0.1s


  y = column_or_1d(y, warn=True)


[CV 1/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.974 total time=  13.5s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.981 total time=   9.5s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.980 total time=  10.3s


  y = column_or_1d(y, warn=True)


[CV 4/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.977 total time=  19.7s


  y = column_or_1d(y, warn=True)


[CV 5/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.983 total time=  11.4s


  y = column_or_1d(y, warn=True)


[CV 1/5] END .....C=100, gamma=0.1, kernel=poly;, score=0.970 total time=   0.3s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .....C=100, gamma=0.1, kernel=poly;, score=0.967 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .....C=100, gamma=0.1, kernel=poly;, score=0.961 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .....C=100, gamma=0.1, kernel=poly;, score=0.963 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .....C=100, gamma=0.1, kernel=poly;, score=0.969 total time=   0.2s
[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.960 total time=   0.0s
[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.969 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.969 total time=   0.0s
[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.963 total time=   0.0s
[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.953 total time=   0.0s


  y = column_or_1d(y, warn=True)


[CV 1/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.974 total time=  14.3s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.981 total time=   9.4s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.980 total time=   8.2s


  y = column_or_1d(y, warn=True)


[CV 4/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.977 total time=  16.8s


  y = column_or_1d(y, warn=True)


[CV 5/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.983 total time=   8.8s
[CV 1/5] END ....C=100, gamma=0.01, kernel=poly;, score=0.961 total time=   0.0s
[CV 2/5] END ....C=100, gamma=0.01, kernel=poly;, score=0.956 total time=   0.0s
[CV 3/5] END ....C=100, gamma=0.01, kernel=poly;, score=0.964 total time=   0.0s
[CV 4/5] END ....C=100, gamma=0.01, kernel=poly;, score=0.964 total time=   0.0s
[CV 5/5] END ....C=100, gamma=0.01, kernel=poly;, score=0.961 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.974 total time=   0.0s
[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.981 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.980 total time=   0.0s
[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.977 total time=   0.0s
[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END .C=100, gamma=0.001, kernel=linear;, score=0.974 total time=  11.9s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .C=100, gamma=0.001, kernel=linear;, score=0.981 total time=   8.2s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .C=100, gamma=0.001, kernel=linear;, score=0.980 total time=   7.8s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .C=100, gamma=0.001, kernel=linear;, score=0.977 total time=  16.2s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .C=100, gamma=0.001, kernel=linear;, score=0.983 total time=   8.5s
[CV 1/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.937 total time=   0.0s
[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.974 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.981 total time=   0.0s
[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.980 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.977 total time=   0.0s
[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.983 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END C=100, gamma=0.0001, kernel=linear;, score=0.974 total time=  12.1s


  y = column_or_1d(y, warn=True)


[CV 2/5] END C=100, gamma=0.0001, kernel=linear;, score=0.981 total time=   8.9s


  y = column_or_1d(y, warn=True)


[CV 3/5] END C=100, gamma=0.0001, kernel=linear;, score=0.980 total time=   8.7s


  y = column_or_1d(y, warn=True)


[CV 4/5] END C=100, gamma=0.0001, kernel=linear;, score=0.977 total time=  17.7s


  y = column_or_1d(y, warn=True)


[CV 5/5] END C=100, gamma=0.0001, kernel=linear;, score=0.983 total time=  10.2s
[CV 1/5] END ..C=100, gamma=0.0001, kernel=poly;, score=0.939 total time=   0.0s
[CV 2/5] END ..C=100, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 3/5] END ..C=100, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 4/5] END ..C=100, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s
[CV 5/5] END ..C=100, gamma=0.0001, kernel=poly;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.941 total time=   0.0s
[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.939 total time=   0.0s
[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.939 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.940 total time=   0.0s
[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.937 total time=   0.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/5] END ....C=1000, gamma=1, kernel=linear;, score=0.974 total time= 1.2min


  y = column_or_1d(y, warn=True)


[CV 2/5] END ....C=1000, gamma=1, kernel=linear;, score=0.981 total time= 1.3min


  y = column_or_1d(y, warn=True)


[CV 3/5] END ....C=1000, gamma=1, kernel=linear;, score=0.980 total time= 1.4min


  y = column_or_1d(y, warn=True)


[CV 4/5] END ....C=1000, gamma=1, kernel=linear;, score=0.977 total time= 1.2min


  y = column_or_1d(y, warn=True)


[CV 5/5] END ....C=1000, gamma=1, kernel=linear;, score=0.983 total time= 1.0min


  y = column_or_1d(y, warn=True)


[CV 1/5] END ......C=1000, gamma=1, kernel=poly;, score=0.947 total time=   5.2s


  y = column_or_1d(y, warn=True)


[CV 2/5] END ......C=1000, gamma=1, kernel=poly;, score=0.931 total time=   6.6s


  y = column_or_1d(y, warn=True)


[CV 3/5] END ......C=1000, gamma=1, kernel=poly;, score=0.940 total time=  12.1s


  y = column_or_1d(y, warn=True)


[CV 4/5] END ......C=1000, gamma=1, kernel=poly;, score=0.943 total time=   9.7s


  y = column_or_1d(y, warn=True)


[CV 5/5] END ......C=1000, gamma=1, kernel=poly;, score=0.934 total time=  18.2s


  y = column_or_1d(y, warn=True)


[CV 1/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.933 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 2/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.933 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 3/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.941 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 4/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.939 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 5/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.937 total time=   0.2s


  y = column_or_1d(y, warn=True)


[CV 1/5] END ..C=1000, gamma=0.1, kernel=linear;, score=0.974 total time= 1.3min


  y = column_or_1d(y, warn=True)


In [None]:
c_matrix = confusion_matrix(y_test, grid.predict(X_test))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Grid search SVM Linear", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# Decision tree model

In [None]:
classifier = DecisionTreeClassifier()
classifier = classifier.fit(X_train,y_train)

In [None]:
c_matrix = confusion_matrix(y_test, classifier.predict(X_test))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"DT", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# Decision tree model using the randomsearch

In [None]:
score_measure = "recall"
kfolds = 5

param_grid = {
    'min_samples_split': np.arange(1,60),  
    'min_samples_leaf': np.arange(1,50),
    'min_impurity_decrease': np.arange(0.0001, 0.01, 0.0005),
    'max_leaf_nodes': np.arange(5, 200), 
    'max_depth': np.arange(1,50), 
    'criterion': ['entropy', 'gini'],
}

dtree = DecisionTreeClassifier()
rand_search = RandomizedSearchCV(estimator = dtree, param_distributions=param_grid, cv=kfolds, n_iter=500,
                           scoring=score_measure, verbose=1, n_jobs=-1, return_train_score=True)

_ = rand_search.fit(X_train, y_train)

print(f"The best {score_measure} score is {rand_search.best_score_}")
print(f"... with parameters: {rand_search.best_params_}")

bestRecallTree = rand_search.best_estimator_

In [None]:
c_matrix = confusion_matrix(y_test, rand_search.predict(X_test))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Decision tree random search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# Decision tree model using the Gridsearch

In [None]:
score_measure = "recall"
kfolds = 5

param_grid = {
    'min_samples_split': np.arange(7,10),  
    'min_samples_leaf': np.arange(1,5),
    'min_impurity_decrease': np.arange(0.0048, 0.0054, 0.0001),
    'max_leaf_nodes': np.arange(162,168), 
    'max_depth': np.arange(15,21), 
    'criterion': ['entropy'],
}

dtree = DecisionTreeClassifier()
grid_search = GridSearchCV(estimator = dtree, param_grid=param_grid, cv=kfolds, 
                           scoring=score_measure, verbose=1, n_jobs=-1,  # n_jobs=-1 will utilize all available CPUs 
                           return_train_score=True)

_ = grid_search.fit(X_train, y_train)

print(f"The best {score_measure} score is {grid_search.best_score_}")
print(f"... with parameters: {grid_search.best_params_}")

bestRecallTree = grid_search.best_estimator_

In [None]:
c_matrix = confusion_matrix(y_test, grid_search.predict(X_test))
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"Grid search", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

In [None]:
performance.sort_values(by=['Recall'])

In [None]:
### Decision tree is the best performing model with a recall score 0f 65.