# Hyperparameter tuning

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn import model_selection
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from random import randint

## Import the dataset

In [2]:
data = pd.read_csv(r'..\Datasets\Sri Lankan Voice Recordings.csv', header = 0)
data.head()

Unnamed: 0,Subject Id,Jitter(local),"Jitter(local, absolute)",Jitter (rap),Jitter (ppq5),Jitter (ddp),Shimmer (local),"Shimmer (local, db)",Shimmer (apq3),Shimmer (apq5),...,Maximum pitch,Number of pulses,Number of periods,Mean period,Standard deviation of period,Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks,UPDRS,status
0,1,0.01488,9e-05,0.009,0.00794,0.02699,0.08334,0.779,0.04517,0.04609,...,187.576,160,159,0.006065,0.000416,0.0,0,0.0,23,1
1,1,0.00728,3.8e-05,0.00353,0.00376,0.01059,0.05864,0.642,0.02058,0.0318,...,234.505,170,169,0.005181,0.000403,0.02247,0,0.0,23,1
2,1,0.0122,7.4e-05,0.00732,0.0067,0.02196,0.08719,0.875,0.04347,0.05166,...,211.442,1431,1427,0.006071,0.000474,0.10656,1,0.00178,23,1
3,2,0.00427,3.4e-05,0.00243,0.00249,0.00728,0.07428,0.694,0.04205,0.04311,...,129.205,345,344,0.008073,0.000143,0.0,0,0.0,8,1
4,2,0.00844,6.9e-05,0.00509,0.00489,0.01527,0.14053,1.239,0.06131,0.07015,...,126.788,585,582,0.008174,0.000123,0.00209,2,0.00768,8,1


In [3]:
data.shape

(120, 29)

In [4]:
X = data.iloc[:, 1:24].values
X

array([[1.488000e-02, 9.021300e-05, 9.000000e-03, ..., 1.590000e+02,
        6.064725e-03, 4.162760e-04],
       [7.280000e-03, 3.769800e-05, 3.530000e-03, ..., 1.690000e+02,
        5.181253e-03, 4.034940e-04],
       [1.220000e-02, 7.404100e-05, 7.320000e-03, ..., 1.427000e+03,
        6.070749e-03, 4.742890e-04],
       ...,
       [4.710000e-03, 3.383700e-05, 1.860000e-03, ..., 1.750000e+02,
        7.178780e-03, 1.298030e-04],
       [4.319000e-02, 3.171220e-04, 2.774000e-02, ..., 2.560000e+02,
        7.342534e-03, 3.119150e-04],
       [1.543000e-02, 1.033570e-04, 1.004000e-02, ..., 4.250000e+02,
        6.700038e-03, 1.082910e-04]])

In [5]:
y = data.iloc[:, -1].values
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [6]:
standard_X = StandardScaler()
X_scaled_standard = standard_X.fit_transform(X)
X_scaled_standard

array([[ 0.87868587,  0.77094821,  1.07622998, ..., -0.64799993,
        -0.18879174, -0.06122861],
       [-0.20384012, -0.39022832, -0.21557662, ..., -0.61325462,
        -0.75744717, -0.07935857],
       [ 0.49695302,  0.41336377,  0.67947768, ...,  3.75770466,
        -0.18491433,  0.02105691],
       ...,
       [-0.56990483, -0.47560017, -0.60996731, ..., -0.59240744,
         0.52828072, -0.46756133],
       [ 4.9110952 ,  5.78820816,  5.50190746, ..., -0.31097047,
         0.63368259, -0.20925404],
       [ 0.95702657,  1.06157954,  1.32183855, ...,  0.27622517,
         0.22013372, -0.49807391]])

In [7]:
nums = np.linspace(5, 1e-03, 200)
print(nums)
print(len(nums))

[5.00000000e+00 4.97487940e+00 4.94975879e+00 4.92463819e+00
 4.89951759e+00 4.87439698e+00 4.84927638e+00 4.82415578e+00
 4.79903518e+00 4.77391457e+00 4.74879397e+00 4.72367337e+00
 4.69855276e+00 4.67343216e+00 4.64831156e+00 4.62319095e+00
 4.59807035e+00 4.57294975e+00 4.54782915e+00 4.52270854e+00
 4.49758794e+00 4.47246734e+00 4.44734673e+00 4.42222613e+00
 4.39710553e+00 4.37198492e+00 4.34686432e+00 4.32174372e+00
 4.29662312e+00 4.27150251e+00 4.24638191e+00 4.22126131e+00
 4.19614070e+00 4.17102010e+00 4.14589950e+00 4.12077889e+00
 4.09565829e+00 4.07053769e+00 4.04541709e+00 4.02029648e+00
 3.99517588e+00 3.97005528e+00 3.94493467e+00 3.91981407e+00
 3.89469347e+00 3.86957286e+00 3.84445226e+00 3.81933166e+00
 3.79421106e+00 3.76909045e+00 3.74396985e+00 3.71884925e+00
 3.69372864e+00 3.66860804e+00 3.64348744e+00 3.61836683e+00
 3.59324623e+00 3.56812563e+00 3.54300503e+00 3.51788442e+00
 3.49276382e+00 3.46764322e+00 3.44252261e+00 3.41740201e+00
 3.39228141e+00 3.367160

## Performing Grid Search on Various Models

### Logistic Regression

In [7]:
leave_one_out = model_selection.LeaveOneOut()

In [9]:
log_param_grid = {'penalty' : ['l1', 'l2', 'elasticnet', 'none'], 'C' : np.linspace(0.000001, 0.001, 100),
                  'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 
}

In [10]:
model = LogisticRegression(random_state=20)
model_logreg = GridSearchCV(model, log_param_grid, cv = 5, n_jobs = 4, verbose = 1)
model_logreg.fit(X_scaled_standard, y)

Fitting 5 folds for each of 2000 candidates, totalling 10000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  88 tasks      | elapsed:    1.7s
[Parallel(n_jobs=4)]: Done 8712 tasks      | elapsed:   22.3s
[Parallel(n_jobs=4)]: Done 10000 out of 10000 | elapsed:   25.3s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=20, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=4,
             param_grid={'C': array([1.00000000e-06, 1.109090...
       8.89000000e-04, 8.99090909e-04, 9.09181818e-04, 9.19272727e-04,
       9.29363636e-04, 9.39454545e-04, 9.49545455e-04, 9.59636364e-04,
       9.69727273e-04, 9.79818182e-04, 9.89909091e-04, 1.00000000e-03]),
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
     

In [11]:
model_logreg.best_params_

{'C': 8.172727272727273e-05, 'penalty': 'l2', 'solver': 'sag'}

In [12]:
model_logreg.best_score_

0.5333333333333334

In [13]:
model = LogisticRegression()
model_logreg = GridSearchCV(model, log_param_grid, cv = 10, n_jobs = 4, verbose = 1)
model_logreg.fit(X_scaled_standard, y)

Fitting 10 folds for each of 2000 candidates, totalling 20000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 328 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 9720 tasks      | elapsed:   21.9s
[Parallel(n_jobs=4)]: Done 20000 out of 20000 | elapsed:   43.5s finished
  "Setting penalty='none' will ignore the C and l1_ratio "


GridSearchCV(cv=10, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=4,
             param_grid={'C': array([1.00000000e-06, 1.109...
       8.89000000e-04, 8.99090909e-04, 9.09181818e-04, 9.19272727e-04,
       9.29363636e-04, 9.39454545e-04, 9.49545455e-04, 9.59636364e-04,
       9.69727273e-04, 9.79818182e-04, 9.89909091e-04, 1.00000000e-03]),
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
     

In [14]:
model_logreg.best_params_

{'C': 1e-06, 'penalty': 'none', 'solver': 'sag'}

In [15]:
model_logreg.best_score_

0.575

In [16]:
model = LogisticRegression()
model_logreg = GridSearchCV(model, log_param_grid, cv = leave_one_out, n_jobs = 4, verbose = 1)
model_logreg.fit(X_scaled_standard, y)

Fitting 120 folds for each of 2000 candidates, totalling 240000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 8736 tasks      | elapsed:   16.0s
[Parallel(n_jobs=4)]: Done 19520 tasks      | elapsed:   41.0s
[Parallel(n_jobs=4)]: Done 41952 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 77424 tasks      | elapsed:  2.6min
[Parallel(n_jobs=4)]: Done 104012 tasks      | elapsed:  3.5min
[Parallel(n_jobs=4)]: Done 136208 tasks      | elapsed:  4.6min
[Parallel(n_jobs=4)]: Done 169760 tasks      | elapsed:  5.7min
[Parallel(n_jobs=4)]: Done 236016 tasks      | elapsed:  7.9min
[Parallel(n_jobs=4)]: Done 240000 out of 240000 | elapsed:  8.0min finished
  "Setting penalty='none' will ignore the C and l1_ratio "


GridSearchCV(cv=LeaveOneOut(), error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=4,
             param_grid={'C': array([1.0000000...
       8.89000000e-04, 8.99090909e-04, 9.09181818e-04, 9.19272727e-04,
       9.29363636e-04, 9.39454545e-04, 9.49545455e-04, 9.59636364e-04,
       9.69727273e-04, 9.79818182e-04, 9.89909091e-04, 1.00000000e-03]),
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
      

In [17]:
model_logreg.best_params_

{'C': 0.00018263636363636363, 'penalty': 'none', 'solver': 'sag'}

In [18]:
model_logreg.best_score_

0.625

### Decision Trees

In [8]:
dec_trees_param_grid = {
    'max_depth' : range(1,20), 'criterion' : ['gini', 'entropy'], 'splitter' : ['best', 'random'], 
    'min_samples_split' : range(0, 10), 'max_leaf_nodes': range(1,50,5), 'min_impurity_decrease': np.linspace(0.00001, 0.001, 150) } 

In [20]:
model = DecisionTreeClassifier(random_state=20)
model_dectree = GridSearchCV(model, dec_trees_param_grid, cv=5, n_jobs = 4, verbose = 1)
model_dectree.fit(X_scaled_standard, y)

Fitting 5 folds for each of 1140000 candidates, totalling 5700000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 9720 tasks      | elapsed:    6.2s
[Parallel(n_jobs=4)]: Done 25720 tasks      | elapsed:   16.4s
[Parallel(n_jobs=4)]: Done 48120 tasks      | elapsed:   31.2s
[Parallel(n_jobs=4)]: Done 76920 tasks      | elapsed:   50.8s
[Parallel(n_jobs=4)]: Done 112120 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 153720 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 201720 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 256120 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done 316920 tasks      | elapsed:  3.5min
[Parallel(n_jobs=4)]: Done 384120 tasks      | elapsed:  4.2min
[Parallel(n_jobs=4)]: Done 457720 tasks      | elapsed:  5.1min
[Parallel(n_jobs=4)]: Done 537720 tasks      | elapsed:  6.0min
[Parallel(n_jobs=4)]: Done 624120 tasks      | elapsed:  7.0min
[Parallel(n_jobs=4)]: Done 716920 tas

GridSearchCV(cv=5, error_score=nan,
             estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort='deprecated',
                                              random_state=20,
                                              splitter='best'),
             iid='depr...
       9.13624161e-04, 9.20268456e-04, 9.26912752e-04, 9.33557047e-04,
       9.40201342e-04, 9.46

In [21]:
model_dectree.best_params_

{'criterion': 'entropy',
 'max_depth': 5,
 'max_leaf_nodes': 6,
 'min_impurity_decrease': 1e-05,
 'min_samples_split': 2,
 'splitter': 'best'}

In [22]:
model_dectree.best_score_

0.625

In [12]:
model = DecisionTreeClassifier(random_state=20)
model_dectree = RandomizedSearchCV(model, dec_trees_param_grid, cv=10, n_iter = 1500, n_jobs = 4, verbose = 1)
model_dectree.fit(X_scaled_standard, y)

Fitting 10 folds for each of 1500 candidates, totalling 15000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 9720 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done 15000 out of 15000 | elapsed:   13.5s finished


RandomizedSearchCV(cv=10, error_score=nan,
                   estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features=None,
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    presort='deprecated',
                                                    random_state=20,
           

In [13]:
model_dectree.best_params_

{'splitter': 'random',
 'min_samples_split': 7,
 'min_impurity_decrease': 7.644295302013423e-05,
 'max_leaf_nodes': 36,
 'max_depth': 12,
 'criterion': 'gini'}

In [11]:
model_dectree.best_score_

0.5916666666666668

In [14]:
model = DecisionTreeClassifier(random_state=20)
model_dectree = RandomizedSearchCV(model, dec_trees_param_grid, cv=leave_one_out, n_iter = 1500, n_jobs = 4, verbose = 1)
model_dectree.fit(X_scaled_standard, y)

Fitting 120 folds for each of 1500 candidates, totalling 180000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 9720 tasks      | elapsed:    9.5s
[Parallel(n_jobs=4)]: Done 25720 tasks      | elapsed:   24.6s
[Parallel(n_jobs=4)]: Done 48120 tasks      | elapsed:   44.4s
[Parallel(n_jobs=4)]: Done 76920 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 112120 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 153720 tasks      | elapsed:  2.3min
[Parallel(n_jobs=4)]: Done 180000 out of 180000 | elapsed:  2.6min finished


RandomizedSearchCV(cv=LeaveOneOut(), error_score=nan,
                   estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features=None,
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    presort='deprecated',
                                                    random_state=20,


In [15]:
model_dectree.best_params_

{'splitter': 'random',
 'min_samples_split': 8,
 'min_impurity_decrease': 9.63758389261745e-05,
 'max_leaf_nodes': 31,
 'max_depth': 5,
 'criterion': 'entropy'}

In [16]:
model_dectree.best_score_

0.7083333333333334

### Gradient Boosting

In [17]:
grad_boost_param_grid = {
    'loss' : ['deviance', 'exponential'], 'max_features' : ['auto', 'sqrt', 'log2', None], 
    'learning_rate' : np.linspace(1.0, 0.0001, 25), 'n_estimators' : np.arange(1,125,5), 
    'max_depth' : np.arange(1,6)
    }

In [18]:
model = GradientBoostingClassifier(random_state=20)
model_gradboost = RandomizedSearchCV(model, grad_boost_param_grid, cv=5, n_iter = 1500, n_jobs = 4, verbose = 1)
model_gradboost.fit(X_scaled_standard, y)

Fitting 5 folds for each of 500 candidates, totalling 2500 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 104 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done 704 tasks      | elapsed:   12.8s
[Parallel(n_jobs=4)]: Done 1704 tasks      | elapsed:   29.6s
[Parallel(n_jobs=4)]: Done 2500 out of 2500 | elapsed:   42.8s finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                        criterion='friedman_mse',
                                                        init=None,
                                                        learning_rate=0.1,
                                                        loss='deviance',
                                                        max_depth=3,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                    

In [19]:
model_gradboost.best_params_

{'n_estimators': 116,
 'max_features': 'auto',
 'max_depth': 1,
 'loss': 'exponential',
 'learning_rate': 1.0}

In [20]:
model_gradboost.best_score_

0.6416666666666667

In [21]:
model = GradientBoostingClassifier(random_state=20)
model_gradboost = RandomizedSearchCV(model, grad_boost_param_grid, cv=10, n_iter = 1500, n_jobs = 4, verbose = 1)
model_gradboost.fit(X_scaled_standard, y)

Fitting 10 folds for each of 1500 candidates, totalling 15000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 200 tasks      | elapsed:    3.7s
[Parallel(n_jobs=4)]: Done 1400 tasks      | elapsed:   29.2s
[Parallel(n_jobs=4)]: Done 3400 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 6200 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 8784 tasks      | elapsed:  2.9min
[Parallel(n_jobs=4)]: Done 10984 tasks      | elapsed:  3.6min
[Parallel(n_jobs=4)]: Done 13688 tasks      | elapsed:  4.8min
[Parallel(n_jobs=4)]: Done 15000 out of 15000 | elapsed:  5.3min finished


RandomizedSearchCV(cv=10, error_score=nan,
                   estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                        criterion='friedman_mse',
                                                        init=None,
                                                        learning_rate=0.1,
                                                        loss='deviance',
                                                        max_depth=3,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                   

In [22]:
model_gradboost.best_params_

{'n_estimators': 51,
 'max_features': 'sqrt',
 'max_depth': 2,
 'loss': 'deviance',
 'learning_rate': 0.9583375}

In [23]:
model_gradboost.best_score_

0.6166666666666667

### SVC

In [24]:
svc_params = {
    'C' : np.linspace(5, 1e-03, 200), 'kernel' : ['linear', 'rbf', 'sigmoid'],
    'gamma' : ['scale', 'auto'], 'decision_function_shape' : ['ovr', 'ovo']
}

In [25]:
model = SVC(random_state=20)
model_svc = GridSearchCV(model, svc_params, cv=5, n_jobs = 4, verbose = 1)
model_svc.fit(X_scaled_standard, y)

Fitting 5 folds for each of 2400 candidates, totalling 12000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 5112 tasks      | elapsed:    7.3s
[Parallel(n_jobs=4)]: Done 11972 tasks      | elapsed:   15.4s
[Parallel(n_jobs=4)]: Done 12000 out of 12000 | elapsed:   15.4s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=20, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=4,
             param_grid={'C': array([5.00000000e+00, 4.97487940e+00, 4.949...
       2.77326633e-01, 2.52206030e-01, 2.27085427e-01, 2.01964824e-01,
       1.76844221e-01, 1.51723618e-01, 1.26603015e-01, 1.01482412e-01,
       7.63618090e-02, 5.12412060e-02, 2.61206030e-02, 1.00000000e-03]),
                         'decision_function_shape': ['ovr', 'ovo'],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True

In [26]:
model_svc.best_params_

{'C': 2.5633015075376884,
 'decision_function_shape': 'ovr',
 'gamma': 'scale',
 'kernel': 'linear'}

In [27]:
model_svc.best_score_

0.6083333333333333

In [28]:
model = SVC(random_state=20)
model_svc = GridSearchCV(model, svc_params, cv=10, n_jobs = 4, verbose = 1)
model_svc.fit(X_scaled_standard, y)

Fitting 10 folds for each of 2400 candidates, totalling 24000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 5112 tasks      | elapsed:    9.4s
[Parallel(n_jobs=4)]: Done 13112 tasks      | elapsed:   20.3s
[Parallel(n_jobs=4)]: Done 23878 tasks      | elapsed:   32.3s
[Parallel(n_jobs=4)]: Done 24000 out of 24000 | elapsed:   32.4s finished


GridSearchCV(cv=10, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=20, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=4,
             param_grid={'C': array([5.00000000e+00, 4.97487940e+00, 4.94...
       2.77326633e-01, 2.52206030e-01, 2.27085427e-01, 2.01964824e-01,
       1.76844221e-01, 1.51723618e-01, 1.26603015e-01, 1.01482412e-01,
       7.63618090e-02, 5.12412060e-02, 2.61206030e-02, 1.00000000e-03]),
                         'decision_function_shape': ['ovr', 'ovo'],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True

In [29]:
model_svc.best_params_

{'C': 3.618366834170854,
 'decision_function_shape': 'ovr',
 'gamma': 'scale',
 'kernel': 'linear'}

In [30]:
model_svc.best_score_

0.6333333333333334

In [31]:
model = SVC(random_state=20)
model_svc = GridSearchCV(model, svc_params, cv=leave_one_out, n_jobs = 4, verbose = 1)
model_svc.fit(X_scaled_standard, y)

Fitting 120 folds for each of 2400 candidates, totalling 288000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 9720 tasks      | elapsed:   16.4s
[Parallel(n_jobs=4)]: Done 25720 tasks      | elapsed:   45.4s
[Parallel(n_jobs=4)]: Done 48120 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 76920 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 112120 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 153720 tasks      | elapsed:  4.3min
[Parallel(n_jobs=4)]: Done 201720 tasks      | elapsed:  5.4min
[Parallel(n_jobs=4)]: Done 256120 tasks      | elapsed:  6.5min
[Parallel(n_jobs=4)]: Done 288000 out of 288000 | elapsed:  7.0min finished


GridSearchCV(cv=LeaveOneOut(), error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=20, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=4,
             param_grid={'C': array([5.00000000e+00, 4.974879...
       2.77326633e-01, 2.52206030e-01, 2.27085427e-01, 2.01964824e-01,
       1.76844221e-01, 1.51723618e-01, 1.26603015e-01, 1.01482412e-01,
       7.63618090e-02, 5.12412060e-02, 2.61206030e-02, 1.00000000e-03]),
                         'decision_function_shape': ['ovr', 'ovo'],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True,

In [32]:
model_svc.best_params_

{'C': 5.0,
 'decision_function_shape': 'ovr',
 'gamma': 'scale',
 'kernel': 'linear'}

In [33]:
model_svc.best_score_

0.6916666666666667

### kNN

In [34]:
knn_params = {
    'leaf_size':range(1,5), 'n_neighbors':range(1,50), 
    'algorithm' : ['auto'],
    'p':[1,2,3,4,5], 'weights' : ['uniform', 'distance' ]
}

In [35]:
knn_model = KNeighborsClassifier(algorithm = 'auto' )
model_knn = GridSearchCV(knn_model, knn_params, cv=5, n_jobs = 4, verbose = 1)
model_knn.fit(X_scaled_standard, y)

Fitting 5 folds for each of 1960 candidates, totalling 9800 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 116 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 4740 tasks      | elapsed:   11.2s
[Parallel(n_jobs=4)]: Done 9800 out of 9800 | elapsed:   20.4s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=4,
             param_grid={'algorithm': ['auto'], 'leaf_size': range(1, 5),
                         'n_neighbors': range(1, 50), 'p': [1, 2, 3, 4, 5],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=1)

In [36]:
model_knn.best_params_

{'algorithm': 'auto',
 'leaf_size': 1,
 'n_neighbors': 28,
 'p': 5,
 'weights': 'uniform'}

In [37]:
model_knn.best_score_

0.5499999999999999

In [38]:
knn_model = KNeighborsClassifier(algorithm = 'auto' )
model_knn = GridSearchCV(knn_model, knn_params, cv=10, n_jobs = 4, verbose = 1)
model_knn.fit(X_scaled_standard, y)

Fitting 10 folds for each of 1960 candidates, totalling 19600 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 5112 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done 13112 tasks      | elapsed:   21.7s
[Parallel(n_jobs=4)]: Done 19600 out of 19600 | elapsed:   31.2s finished


GridSearchCV(cv=10, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=4,
             param_grid={'algorithm': ['auto'], 'leaf_size': range(1, 5),
                         'n_neighbors': range(1, 50), 'p': [1, 2, 3, 4, 5],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=1)

In [39]:
model_knn.best_params_

{'algorithm': 'auto',
 'leaf_size': 1,
 'n_neighbors': 22,
 'p': 3,
 'weights': 'uniform'}

In [40]:
model_knn.best_score_

0.5583333333333333

In [41]:
knn_model = KNeighborsClassifier(algorithm = 'auto' )
model_knn = GridSearchCV(knn_model, knn_params, cv=leave_one_out, n_jobs = 4, verbose = 1)
model_knn.fit(X_scaled_standard, y)

Fitting 120 folds for each of 1960 candidates, totalling 235200 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 9720 tasks      | elapsed:   11.1s
[Parallel(n_jobs=4)]: Done 25720 tasks      | elapsed:   29.5s
[Parallel(n_jobs=4)]: Done 48120 tasks      | elapsed:   55.2s
[Parallel(n_jobs=4)]: Done 76920 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 112120 tasks      | elapsed:  2.1min
[Parallel(n_jobs=4)]: Done 153720 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done 201720 tasks      | elapsed:  3.7min
[Parallel(n_jobs=4)]: Done 235200 out of 235200 | elapsed:  4.2min finished


GridSearchCV(cv=LeaveOneOut(), error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=4,
             param_grid={'algorithm': ['auto'], 'leaf_size': range(1, 5),
                         'n_neighbors': range(1, 50), 'p': [1, 2, 3, 4, 5],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=1)

In [42]:
model_knn.best_params_

{'algorithm': 'auto',
 'leaf_size': 1,
 'n_neighbors': 13,
 'p': 1,
 'weights': 'uniform'}

In [43]:
model_knn.best_score_

0.625

### Random Forest

In [44]:
ran_for_params = {
    'n_estimators' : range(1,121),
    'criterion': ['gini', 'entropy'],
    'max_depth': range(1,20),
    'min_samples_leaf': range(1,10),
    'min_samples_split': range(1,10),
    'bootstrap' : [True, False]
}

In [45]:
random_forest_model = RandomForestClassifier()
model_rand_fors = RandomizedSearchCV(random_forest_model, ran_for_params, cv=5, n_iter = 500, n_jobs = 4, verbose = 1)
model_rand_fors.fit(X_scaled_standard, y)

Fitting 5 folds for each of 500 candidates, totalling 2500 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done 308 tasks      | elapsed:   13.1s
[Parallel(n_jobs=4)]: Done 808 tasks      | elapsed:   32.8s
[Parallel(n_jobs=4)]: Done 1508 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done 2500 out of 2500 | elapsed:  1.7min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [46]:
model_rand_fors.best_params_

{'n_estimators': 10,
 'min_samples_split': 9,
 'min_samples_leaf': 2,
 'max_depth': 7,
 'criterion': 'entropy',
 'bootstrap': False}

In [47]:
model_rand_fors.best_score_

0.6083333333333334

In [48]:
random_forest_model = RandomForestClassifier()
model_rand_fors = RandomizedSearchCV(random_forest_model, ran_for_params, cv=10, n_iter = 1000, n_jobs = 4, verbose = 1)
model_rand_fors.fit(X_scaled_standard, y)

Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  48 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done 348 tasks      | elapsed:   17.1s
[Parallel(n_jobs=4)]: Done 848 tasks      | elapsed:   39.4s
[Parallel(n_jobs=4)]: Done 2140 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 3940 tasks      | elapsed:  2.6min
[Parallel(n_jobs=4)]: Done 5416 tasks      | elapsed:  3.7min
[Parallel(n_jobs=4)]: Done 6796 tasks      | elapsed:  4.7min
[Parallel(n_jobs=4)]: Done 9096 tasks      | elapsed:  6.3min
[Parallel(n_jobs=4)]: Done 10000 out of 10000 | elapsed:  6.9min finished


RandomizedSearchCV(cv=10, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
              

In [49]:
model_rand_fors.best_params_

{'n_estimators': 4,
 'min_samples_split': 5,
 'min_samples_leaf': 5,
 'max_depth': 10,
 'criterion': 'gini',
 'bootstrap': True}

In [50]:
model_rand_fors.best_score_

0.5999999999999999

In [51]:
random_forest_model = RandomForestClassifier()
model_rand_fors = RandomizedSearchCV(random_forest_model, ran_for_params, cv=leave_one_out, n_iter = 1500, n_jobs = 4, verbose = 1)
model_rand_fors.fit(X_scaled_standard, y)

Fitting 120 folds for each of 1500 candidates, totalling 180000 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    1.8s
[Parallel(n_jobs=4)]: Done 472 tasks      | elapsed:   15.7s
[Parallel(n_jobs=4)]: Done 936 tasks      | elapsed:   33.6s
[Parallel(n_jobs=4)]: Done 2016 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 2862 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 3728 tasks      | elapsed:  2.3min
[Parallel(n_jobs=4)]: Done 4690 tasks      | elapsed:  3.1min
[Parallel(n_jobs=4)]: Done 5716 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done 7124 tasks      | elapsed:  5.2min
[Parallel(n_jobs=4)]: Done 9080 tasks      | elapsed:  6.6min
[Parallel(n_jobs=4)]: Done 11942 tasks      | elapsed:  8.6min
[Parallel(n_jobs=4)]: Done 15076 tasks      | elapsed: 10.6min
[Parallel(n_jobs=4)]: Done 17036 tasks      | elapsed: 12.1min
[Parallel(n_jobs=4)]: Done 19276 tasks      | elapsed: 14.2min
[Parallel(n_jobs=4)]: Done 23030 tasks      | elapsed: 1

RandomizedSearchCV(cv=LeaveOneOut(), error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
   

In [52]:
model_rand_fors.best_params_

{'n_estimators': 20,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_depth': 19,
 'criterion': 'gini',
 'bootstrap': True}

In [53]:
model_rand_fors.best_score_

0.6916666666666667

### Neural Network sklearn