In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.utils.validation import column_or_1d
from sklearn import model_selection
from sklearn.svm import SVC
from sklearn import metrics
import numpy as np
import pandas as pd

# Remove warnings
import warnings
from sklearn.exceptions import DataConversionWarning

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=DataConversionWarning)




<h3> Load the Data </h3>

In [3]:
sonar = pd.io.parsers.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data',
     header=None    
    )

In [5]:
print(sonar.shape)
sonar.head(10)

(208, 61)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R
5,0.0286,0.0453,0.0277,0.0174,0.0384,0.099,0.1201,0.1833,0.2105,0.3039,...,0.0045,0.0014,0.0038,0.0013,0.0089,0.0057,0.0027,0.0051,0.0062,R
6,0.0317,0.0956,0.1321,0.1408,0.1674,0.171,0.0731,0.1401,0.2083,0.3513,...,0.0201,0.0248,0.0131,0.007,0.0138,0.0092,0.0143,0.0036,0.0103,R
7,0.0519,0.0548,0.0842,0.0319,0.1158,0.0922,0.1027,0.0613,0.1465,0.2838,...,0.0081,0.012,0.0045,0.0121,0.0097,0.0085,0.0047,0.0048,0.0053,R
8,0.0223,0.0375,0.0484,0.0475,0.0647,0.0591,0.0753,0.0098,0.0684,0.1487,...,0.0145,0.0128,0.0145,0.0058,0.0049,0.0065,0.0093,0.0059,0.0022,R
9,0.0164,0.0173,0.0347,0.007,0.0187,0.0671,0.1056,0.0697,0.0962,0.0251,...,0.009,0.0223,0.0179,0.0084,0.0068,0.0032,0.0035,0.0056,0.004,R


In [7]:
seed = 1234
X_sonar = sonar.values[:,0:60].astype(float)
y_sonar = sonar.values[:,60:]
y_sonar = column_or_1d(y_sonar, warn=False)

In [11]:
sonar[60].value_counts()

M    111
R     97
Name: 60, dtype: int64

In [13]:
sonar.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


<h3> Split Data For Cross Validation </h3>

In [52]:
# Random split the data into four new datasets, training features, training outcome, test features, 
# and test outcome. Set the size of the test data to be 20% of the full dataset.
X_train, X_test, y_train, y_test = train_test_split(X_sonar, y_sonar, test_size=0.20, random_state=seed)

In [53]:
num_folds=10
scoring='accuracy'
models = []
models.append(('LR',  LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART',DecisionTreeClassifier(random_state=seed)))
models.append(('NB',  GaussianNB()))
models.append(('SVM', SVC(random_state=seed)))
models.append(('RF',  RandomForestClassifier(max_depth=3, random_state=seed)))
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    

LR: 0.753676 (0.113733)
LDA: 0.698529 (0.132261)
KNN: 0.753309 (0.080505)
CART: 0.734559 (0.066924)
NB: 0.656985 (0.130658)
SVM: 0.608456 (0.115809)
RF: 0.710294 (0.125731)


<h3>Standardization Data</h3>

In [54]:
# Standardize the dataset
pipelines = []
pipelines.append(('ScaledLR',Pipeline([('Scaler',StandardScaler()),('LR',LogisticRegression())])))
pipelines.append(('ScaledLDA',Pipeline([('Scaler',StandardScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('ScaledKNN',Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('ScaledCART',Pipeline([('Scaler',StandardScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('ScaledNB',Pipeline([('Scaler',StandardScaler()),('NB',GaussianNB())])))
pipelines.append(('ScaledSVM' ,Pipeline([('Scaler',StandardScaler()),('SVM' , SVC(random_state=seed))])))
pipelines.append(('ScaledRF' ,Pipeline([('Scaler',StandardScaler()),('RF' , RandomForestClassifier(max_depth=3, random_state=seed))])))
results = []
names = []
for name, model in pipelines:
    kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

ScaledLR: 0.741544 (0.105022)
ScaledLDA: 0.698529 (0.132261)
ScaledKNN: 0.795588 (0.069953)
ScaledCART: 0.734559 (0.066924)
ScaledNB: 0.656985 (0.130658)
ScaledSVM: 0.849265 (0.077153)
ScaledRF: 0.710294 (0.125731)


<h3> Training a  SVM classifier</h3>

In [55]:
from __future__ import print_function

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import Normalizer

import numpy as np

import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib import cm

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import PowerTransformer

from sklearn.datasets import fetch_california_housing



# std_scale = StandardScaler().fit(X_train)
distributions = [
    ('Data after standard scaling',
        StandardScaler()),
    ('Data after min-max scaling',
        MinMaxScaler()),
    ('Data after max-abs scaling',
        MaxAbsScaler()),
    ('Data after robust scaling',
        RobustScaler(quantile_range=(25, 75))),
    ('Data after power transformation (Yeo-Johnson)',
     PowerTransformer(method='yeo-johnson')),
#     ('Data after power transformation (Box-Cox)',
#      PowerTransformer(method='box-cox')),
    ('Data after quantile transformation (gaussian pdf)',
        QuantileTransformer(output_distribution='normal')
        ),
    ('Data after quantile transformation (uniform pdf)',
        QuantileTransformer(output_distribution='uniform')
        ),
    ('Data after sample-wise L2 normalizing',
        Normalizer()),
]



print("No normalization or standartization")
svc_scaled = SVC(C=1.5, random_state=seed)
fit_std = svc_scaled.fit(X_train, y_train)
pred_train_std = svc_scaled.predict(X_train)

print('\nPrediction accuracy for the training dataset')
print('{:.2%}'.format(metrics.accuracy_score(y_train, pred_train_std)))

pred_test_std = svc_scaled.predict(X_test)

print('\nPrediction accuracy for the test dataset')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test_std)))


print("#"*50)

    
for name, dist in distributions:
    print(name)
    std_scale = dist.fit(X_train)
    X_train_std = std_scale.transform(X_train)
    X_test_std = std_scale.transform(X_test)

    # on standardized data
    svc_scaled = SVC(C=1.5, random_state=seed)
    fit_std = svc_scaled.fit(X_train_std, y_train)
    pred_train_std = svc_scaled.predict(X_train_std)

    print('\nPrediction accuracy for the training dataset')
    print('{:.2%}'.format(metrics.accuracy_score(y_train, pred_train_std)))

    pred_test_std = svc_scaled.predict(X_test_std)

    print('\nPrediction accuracy for the test dataset')
    print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test_std)))
    
    
    print("#"*50)


No normalization or standartization

Prediction accuracy for the training dataset
72.29%

Prediction accuracy for the test dataset
71.43%

##################################################
Data after standard scaling

Prediction accuracy for the training dataset
98.80%

Prediction accuracy for the test dataset
83.33%

##################################################
Data after min-max scaling

Prediction accuracy for the training dataset
81.93%

Prediction accuracy for the test dataset
71.43%

##################################################
Data after max-abs scaling

Prediction accuracy for the training dataset
81.33%

Prediction accuracy for the test dataset
69.05%

##################################################
Data after robust scaling

Prediction accuracy for the training dataset
96.39%

Prediction accuracy for the test dataset
80.95%

##################################################
Data after power transformation (Yeo-Johnson)

Prediction accuracy for the training da

In [74]:
def print_results(names, resutls, test_scores):
    print()
    print("#"*30 +"Results" + "#"*30)
    counter = 0
    class color:
       PURPLE = '\033[95m'
       CYAN = '\033[96m'
       DARKCYAN = '\033[36m'
       BLUE = '\033[94m'
       GREEN = '\033[92m'
       YELLOW = '\033[93m'
       RED = '\033[91m'
       BOLD = '\033[1m'
       UNDERLINE = '\033[4m'
       END = '\033[0m'


    # Get max row
    clf_names = set([name.split("_")[1] for name in names])
    max_mean = {name:0 for name in clf_names}
    max_mean_counter = {name:0 for name in clf_names}
    for name,result in zip(names,results):
        counter +=1
        clf_name = name.split("_")[1]
        if result.mean()>max_mean[clf_name]:
            max_mean_counter[clf_name] = counter
            max_mean[clf_name] = result.mean()

    # print max row in BOLD
    counter = 0
    prev_clf_name = names[0].split("_")[1]
    for name,result ,score in zip(names,results,test_scores): 
        counter +=1
        clf_name = name.split("_")[1]
        if prev_clf_name != clf_name:
            print()
            prev_clf_name = clf_name
        msg = "%s: %f (%f) [test_score:%.3f]" % (name, result.mean(), result.std(), score)
        if counter==max_mean_counter[clf_name]:
            print(color.BOLD + msg)
        else:
            print(color.END + msg)
            
def print_results2(names, results_mean,results_std, test_scores):
    print()
    print("#"*30 +"Results" + "#"*30)
    class color:
       PURPLE = '\033[95m'
       CYAN = '\033[96m'
       DARKCYAN = '\033[36m'
       BLUE = '\033[94m'
       GREEN = '\033[92m'
       YELLOW = '\033[93m'
       RED = '\033[91m'
       BOLD = '\033[1m'
       UNDERLINE = '\033[4m'
       END = '\033[0m'



    # print max row in BOLD
    prev_clf_name = names[0].split("_")[1]
    for name,mean,std, score in zip(names,results_mean,results_std, test_scores): 
        clf_name = name.split("_")[1]
        if prev_clf_name != clf_name:
            print()
            prev_clf_name = clf_name
        
        msg = "%s: %f (%f) [test_score:%.3f]" % (name, mean, std, score)
        if mean==max(results_mean):
            print(color.BOLD + msg)
        else:
            print(color.END + msg)

In [76]:
# Standardize the dataset
pipelines = []
pipelines.append(('_LR',Pipeline([('LR',LogisticRegression())])))
pipelines.append(('Scaled_LR',Pipeline([('Scaler',StandardScaler()),('LR',LogisticRegression())])))
pipelines.append(('MinMax_LR',Pipeline([('Scaler',MinMaxScaler()),('LR',LogisticRegression())])))
pipelines.append(('MaxAbsScaler_LR',Pipeline([('Scaler',MaxAbsScaler()),('LR',LogisticRegression())])))
pipelines.append(('RobustScaler_LR',Pipeline([('Scaler',RobustScaler()),('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Normal_LR',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Uniform_LR',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('LR',LogisticRegression())])))
pipelines.append(('PowerTransformer-yeo-johnson_LR',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('LR',LogisticRegression())])))
pipelines.append(('Normalizer_LR',Pipeline([('Scaler',Normalizer()),('LR',LogisticRegression())])))

pipelines.append(('_LR-PCA',Pipeline([('LR',LogisticRegression())])))
pipelines.append(('Scaled_LR-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('MinMax_LR-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('MaxAbsScaler_LR-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('RobustScaler_LR-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Normal_LR-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Uniform_LR-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('PowerTransformer-yeo-johnson_LR-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('Normalizer_LR-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))



pipelines.append(('_LDA',Pipeline([('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Scaled_LDA',Pipeline([('Scaler',StandardScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MinMax_LDA',Pipeline([('Scaler',MinMaxScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MaxAbsScaler_LDA',Pipeline([('Scaler',MaxAbsScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('RobustScaler_LDA',Pipeline([('Scaler',RobustScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer-Normal_LDA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer-Uniform_LDA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('PowerTransformer-yeo-johnson_LDA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Normalizer_LDA',Pipeline([('Scaler',Normalizer()),('LDA',LinearDiscriminantAnalysis())])))

pipelines.append(('_LDA-PCA',Pipeline([('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Scaled_LDA-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MinMax_LDA-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MaxAbsScaler_LDA-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('RobustScaler_LDA-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer-Normal_LDA-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer-Uniform_LDA-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('PowerTransformer-yeo-johnson_LDA-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Normalizer_LDA-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))



pipelines.append(('_KNN',Pipeline([('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_KNN',Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MinMax_KNN',Pipeline([('Scaler',MinMaxScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MaxAbsScaler_KNN',Pipeline([('Scaler',MaxAbsScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('RobustScaler_KNN',Pipeline([('Scaler',RobustScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Normal_KNN',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Uniform_KNN',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('KNN',KNeighborsClassifier())])))
pipelines.append(('PowerTransformer-yeo-johnson_KNN',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('KNN',KNeighborsClassifier())])))
pipelines.append(('Normalizer_KNN',Pipeline([('Scaler',Normalizer()),('KNN',KNeighborsClassifier())])))

pipelines.append(('_KNN-PCA',Pipeline([('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_KNN-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('MinMax_KNN-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('MaxAbsScaler_KNN-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('RobustScaler_KNN-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Normal_KNN-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Uniform_KNN-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('PowerTransformer-yeo-johnson_KNN-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('Normalizer_KNN-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))



pipelines.append(('_CART',Pipeline([('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('Scaled_CART',Pipeline([('Scaler',StandardScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('MinMax_CART',Pipeline([('Scaler',MinMaxScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_CART',Pipeline([('Scaler',MaxAbsScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_CART',Pipeline([('Scaler',RobustScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_CART',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_CART',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_CART',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('Normalizer_CART',Pipeline([('Scaler',Normalizer()),('CART',DecisionTreeClassifier(random_state=seed))])))

pipelines.append(('_CART-PCA',Pipeline([('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('Scaled_CART-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('MinMax_CART-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_CART-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_CART-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_CART-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_CART-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_CART-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('Normalizer_CART-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('CART',DecisionTreeClassifier(random_state=seed))])))



pipelines.append(('_NB',Pipeline([('NB',GaussianNB())])))
pipelines.append(('Scaled_NB',Pipeline([('Scaler',StandardScaler()),('NB',GaussianNB())])))
pipelines.append(('MinMax_NB',Pipeline([('Scaler',MinMaxScaler()),('NB',GaussianNB())])))
pipelines.append(('MaxAbsScaler_NB',Pipeline([('Scaler',MaxAbsScaler()),('NB',GaussianNB())])))
pipelines.append(('RobustScaler_NB',Pipeline([('Scaler',RobustScaler()),('NB',GaussianNB())])))
pipelines.append(('QuantileTransformer-Normal_NB',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('NB',GaussianNB())])))
pipelines.append(('QuantileTransformer-Uniform_NB',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('NB',GaussianNB())])))
pipelines.append(('PowerTransformer-yeo-johnson_NB',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('NB',GaussianNB())])))
pipelines.append(('Normalizer_NB',Pipeline([('Scaler',Normalizer()),('NB',GaussianNB())])))

pipelines.append(('_NB-PCA',Pipeline([('NB',GaussianNB())])))
pipelines.append(('Scaled_NB-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('MinMax_NB-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('MaxAbsScaler_NB-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('RobustScaler_NB-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('QuantileTransformer-Normal_NB-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('QuantileTransformer-Uniform_NB-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('PowerTransformer-yeo-johnson_NB-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))
pipelines.append(('Normalizer_NB-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('NB',GaussianNB())])))



pipelines.append(('_SVM' ,Pipeline([('SVM' , SVC(random_state=seed))])))
pipelines.append(('Scaled_SVM' ,Pipeline([('Scaler',StandardScaler()),('SVM' , SVC(random_state=seed))])))
pipelines.append(('MinMax_SVM',Pipeline([('Scaler',MinMaxScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('MaxAbsScaler_SVM',Pipeline([('Scaler',MaxAbsScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('RobustScaler_SVM',Pipeline([('Scaler',RobustScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_SVM',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_SVM',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('SVM',SVC(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_SVM',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('SVM',SVC(random_state=seed))])))
pipelines.append(('Normalizer_SVM',Pipeline([('Scaler',Normalizer()),('SVM',SVC(random_state=seed))])))

pipelines.append(('_SVM-PCA',Pipeline([('SVM',SVC(random_state=seed))])))
pipelines.append(('Scaled_SVM-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('MinMax_SVM-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('MaxAbsScaler_SVM-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('RobustScaler_SVM-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_SVM-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_SVM-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_SVM-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('Normalizer_SVM-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))



pipelines.append(('_RF' ,Pipeline([('RF' , RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Scaled_RF' ,Pipeline([('Scaler',StandardScaler()),('RF' , RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MinMax_RF',Pipeline([('Scaler',MinMaxScaler()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MaxAbsScaler_RF',Pipeline([('Scaler',MaxAbsScaler()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('RobustScaler_RF',Pipeline([('Scaler',RobustScaler()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_RF',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_RF',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_RF',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Normalizer_RF',Pipeline([('Scaler',Normalizer()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))

pipelines.append(('_RF-PCA',Pipeline([('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Scaled_RF-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MinMax_RF-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MaxAbsScaler_RF-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('RobustScaler_RF-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_RF-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_RF-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_RF-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Normalizer_RF-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))



pipelines.append(('_MLP',Pipeline([('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Scaled_MLP',Pipeline([('Scaler',StandardScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MinMax_MLP',Pipeline([('Scaler',MinMaxScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_MLP',Pipeline([('Scaler',MaxAbsScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_MLP',Pipeline([('Scaler',RobustScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_MLP',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_MLP',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_MLP',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Normalizer_MLP',Pipeline([('Scaler',Normalizer()),('MLP',MLPClassifier(random_state=seed))])))

pipelines.append(('_MLP-PCA',Pipeline([('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Scaled_MLP-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MinMax_MLP-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_MLP-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_MLP-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_MLP-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_MLP-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_MLP-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Normalizer_MLP-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))




results = []
names = []
test_scores = []
for name, model in pipelines:
    kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    
    # fit on train and predict on test
    model.fit(X_train,y_train)
    test_scores.append(model.score(X_test,y_test))
    
print_results(names, results, test_scores)

_LR: 0.753676 (0.113733)
Scaled_LR: 0.741544 (0.105022)
MinMax_LR: 0.777941 (0.120510)
MaxAbsScaler_LR: 0.777941 (0.120510)
RobustScaler_LR: 0.735294 (0.076785)
QuantileTransformer-Normal_LR: 0.718015 (0.134922)
QuantileTransformer-Uniform_LR: 0.808088 (0.116896)
PowerTransformer-yeo-johnson_LR: 0.759559 (0.089047)
Normalizer_LR: 0.698529 (0.096671)
_LR-PCA: 0.753676 (0.113733)
Scaled_LR-PCA: 0.752574 (0.146544)
MinMax_LR-PCA: 0.752941 (0.145486)
MaxAbsScaler_LR-PCA: 0.758824 (0.142239)
RobustScaler_LR-PCA: 0.733824 (0.130514)
QuantileTransformer-Normal_LR-PCA: 0.741176 (0.155095)
QuantileTransformer-Uniform_LR-PCA: 0.788603 (0.117316)
PowerTransformer-yeo-johnson_LR-PCA: 0.782353 (0.116780)
Normalizer_LR-PCA: 0.631250 (0.068644)
_LDA: 0.698529 (0.132261)
Scaled_LDA: 0.698529 (0.132261)
MinMax_LDA: 0.698529 (0.132261)
MaxAbsScaler_LDA: 0.698529 (0.132261)
RobustScaler_LDA: 0.698529 (0.132261)
QuantileTransformer-Normal_LDA: 0.694485 (0.156079)
QuantileTransformer-Uniform_LDA: 0.741912 



_MLP: 0.778309 (0.125173)




Scaled_MLP: 0.819853 (0.084415)




MinMax_MLP: 0.766544 (0.143234)




MaxAbsScaler_MLP: 0.766544 (0.143234)




RobustScaler_MLP: 0.807721 (0.087606)




QuantileTransformer-Normal_MLP: 0.808456 (0.117215)




QuantileTransformer-Uniform_MLP: 0.807721 (0.105522)




PowerTransformer-yeo-johnson_MLP: 0.838603 (0.110764)




Normalizer_MLP: 0.723529 (0.131778)




_MLP-PCA: 0.778309 (0.125173)




Scaled_MLP-PCA: 0.776838 (0.090579)




MinMax_MLP-PCA: 0.747059 (0.110292)




MaxAbsScaler_MLP-PCA: 0.740809 (0.107743)




RobustScaler_MLP-PCA: 0.770588 (0.089062)




QuantileTransformer-Normal_MLP-PCA: 0.728676 (0.119684)




QuantileTransformer-Uniform_MLP-PCA: 0.776103 (0.103706)




PowerTransformer-yeo-johnson_MLP-PCA: 0.770221 (0.122909)




Normalizer_MLP-PCA: 0.618750 (0.097410)

##############################Results##############################
[0m_LR: 0.753676 (0.113733) [test_score:0.714]
[0mScaled_LR: 0.741544 (0.105022) [test_score:0.833]
[0mMinMax_LR: 0.777941 (0.120510) [test_score:0.762]
[0mMaxAbsScaler_LR: 0.777941 (0.120510) [test_score:0.762]
[0mRobustScaler_LR: 0.735294 (0.076785) [test_score:0.810]
[0mQuantileTransformer-Normal_LR: 0.718015 (0.134922) [test_score:0.810]
[1mQuantileTransformer-Uniform_LR: 0.808088 (0.116896) [test_score:0.786]
[0mPowerTransformer-yeo-johnson_LR: 0.759559 (0.089047) [test_score:0.833]
[0mNormalizer_LR: 0.698529 (0.096671) [test_score:0.690]

[0m_LR-PCA: 0.753676 (0.113733) [test_score:0.714]
[0mScaled_LR-PCA: 0.752574 (0.146544) [test_score:0.738]
[0mMinMax_LR-PCA: 0.752941 (0.145486) [test_score:0.714]
[0mMaxAbsScaler_LR-PCA: 0.758824 (0.142239) [test_score:0.714]
[0mRobustScaler_LR-PCA: 0.733824 (0.130514) [test_score:0.738]
[0mQuantileTransformer-Normal_LR-P



# Hypertune parameters and then check Normalization and Standartization

# Hypertune Random Forest

In [40]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import cross_val_score  


# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 20, num = 3)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Create the random grid
random_grid = {'RF__n_estimators': n_estimators,
               'RF__max_features': max_features,
               'RF__max_depth': max_depth,
               'RF__min_samples_split': min_samples_split,
               'RF__min_samples_leaf': min_samples_leaf,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_RF' ,Pipeline([('RF' , RandomForestClassifier(random_state=seed))])))
pipelines.append(('Scaled_RF' ,Pipeline([('Scaler',StandardScaler()),('RF' , RandomForestClassifier(random_state=seed))])))
pipelines.append(('MinMax_RF',Pipeline([('Scaler',MinMaxScaler()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_RF',Pipeline([('Scaler',MaxAbsScaler()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_RF',Pipeline([('Scaler',RobustScaler()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_RF',Pipeline([('Scaler',QuantileTransformer()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('Normalizer_RF',Pipeline([('Scaler',Normalizer()),('RF',RandomForestClassifier(random_state=seed))])))

pipelines.append(('_RF-PCA',Pipeline([('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Scaled_RF-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MinMax_RF-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MaxAbsScaler_RF-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('RobustScaler_RF-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('QuantileTransformer_RF-PCA',Pipeline([('Scaler',QuantileTransformer()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Normalizer_RF-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))



results_mean = []
results_std = []
names = []
test_scores = []


for name, model in pipelines:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
        
    test_scores.append(clf.score(X_test, y_test))

print_results2(names, results_mean,results_std, test_scores)

Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 1539 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 5193 tasks      | elapsed:   24.2s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   28.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_RF: 0.759036 (0.094511)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1516 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 4764 tasks      | elapsed:   17.3s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   23.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_RF: 0.759036 (0.094511)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 214 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 2876 tasks      | elapsed:   10.9s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   26.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_RF: 0.759036 (0.094511)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 1836 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 4678 tasks      | elapsed:   20.9s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   28.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_RF: 0.759036 (0.094511)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 106 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 1316 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 3346 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done 6176 tasks      | elapsed:   40.2s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   42.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_RF: 0.759036 (0.094511)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:   32.9s
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:   48.6s
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 5816 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  3.7min finished


QuantileTransformer_RF: 0.765060 (0.103615)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 4012 tasks      | elapsed:   19.7s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   30.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Normalizer_RF: 0.789157 (0.108115)
Test score 0.7380952380952381
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 4012 tasks      | elapsed:   19.9s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   34.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_RF-PCA: 0.759036 (0.094511)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 612 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 1830 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 3528 tasks      | elapsed:   20.0s
[Parallel(n_jobs=-1)]: Done 5718 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   34.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_RF-PCA: 0.722892 (0.095618)
Test score 0.7142857142857143
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 4012 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   28.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_RF-PCA: 0.759036 (0.108783)
Test score 0.7857142857142857
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 4012 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   27.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_RF-PCA: 0.777108 (0.131756)
Test score 0.8333333333333334
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 1056 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 2680 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 4944 tasks      | elapsed:   31.8s
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   41.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_RF-PCA: 0.777108 (0.143657)
Test score 0.7857142857142857
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 682 tasks      | elapsed:   23.8s
[Parallel(n_jobs=-1)]: Done 1248 tasks      | elapsed:   42.1s
[Parallel(n_jobs=-1)]: Done 1978 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 2868 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 3922 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 5136 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  3.6min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer_RF-PCA: 0.728916 (0.106791)
Test score 0.7857142857142857
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 1836 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 4678 tasks      | elapsed:   19.5s


Normalizer_RF-PCA: 0.674699 (0.066384)
Test score 0.7619047619047619

##############################Results##############################
[0m_RF: 0.759036 (0.094511)
[0mScaled_RF: 0.759036 (0.094511)
[0mMinMax_RF: 0.759036 (0.094511)
[0mMaxAbsScaler_RF: 0.759036 (0.094511)
[0mRobustScaler_RF: 0.759036 (0.094511)
[0mQuantileTransformer_RF: 0.765060 (0.103615)
[1mNormalizer_RF: 0.789157 (0.108115)

[0m_RF-PCA: 0.759036 (0.094511)
[0mScaled_RF-PCA: 0.722892 (0.095618)
[0mMinMax_RF-PCA: 0.759036 (0.108783)
[0mMaxAbsScaler_RF-PCA: 0.777108 (0.131756)
[0mRobustScaler_RF-PCA: 0.777108 (0.143657)
[0mQuantileTransformer_RF-PCA: 0.728916 (0.106791)
[0mNormalizer_RF-PCA: 0.674699 (0.066384)


[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   27.1s finished


# Hypertune KNN

In [79]:
from sklearn.model_selection import RandomizedSearchCV


n_neighbors  = [int(x) for x in np.linspace(start = 1, stop = 20, num = 2)]
weights  = ["uniform","distance"]
algorithm = ["auto", "ball_tree", "kd_tree", "brute"]
leaf_size =[int(x) for x in np.linspace(start = 5, stop = 50, num = 2)]
p =[int(x) for x in np.linspace(start = 1, stop = 4, num = 1)]
# Create the random grid
random_grid = {'KNN__n_neighbors': n_neighbors,
               'KNN__weights': weights,
               'KNN__algorithm': algorithm,
               'KNN__leaf_size': leaf_size,
               'KNN__p': p,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_KNN',Pipeline([('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_KNN',Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MinMax_KNN',Pipeline([('Scaler',MinMaxScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MaxAbsScaler_KNN',Pipeline([('Scaler',MaxAbsScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('RobustScaler_KNN',Pipeline([('Scaler',RobustScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Normal_KNN',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Uniform_KNN',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('KNN',KNeighborsClassifier())])))
pipelines.append(('PowerTransformer-yeo-johnson_KNN',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('KNN',KNeighborsClassifier())])))
pipelines.append(('Normalizer_KNN',Pipeline([('Scaler',Normalizer()),('KNN',KNeighborsClassifier())])))

pipelines.append(('_KNN-PCA',Pipeline([('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_KNN-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('MinMax_KNN-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('MaxAbsScaler_KNN-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('RobustScaler_KNN-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Normal_KNN-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer-Uniform_KNN-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('PowerTransformer-yeo-johnson_KNN-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))
pipelines.append(('Normalizer_KNN-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('KNN',KNeighborsClassifier())])))


results_mean = []
results_std = []
names = []
test_scores = []

kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
    
    test_scores.append(clf.score(X_test, y_test))

print_results2(names, results_mean,results_std, test_scores)

Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_KNN: 0.855422 (0.047806)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_KNN: 0.825301 (0.094589)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_KNN: 0.843373 (0.061421)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_KNN: 0.843373 (0.061421)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 144 out of 320 | elapsed:    0.4s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_KNN: 0.837349 (0.073886)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    7.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Normal_KNN: 0.813253 (0.099759)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    7.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Uniform_KNN: 0.891566 (0.050710)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    8.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


PowerTransformer-yeo-johnson_KNN: 0.891566 (0.058800)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Normalizer_KNN: 0.801205 (0.091692)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_KNN-PCA: 0.855422 (0.047806)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_KNN-PCA: 0.819277 (0.077372)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_KNN-PCA: 0.783133 (0.135372)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_KNN-PCA: 0.783133 (0.129690)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 144 out of 320 | elapsed:    0.5s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_KNN-PCA: 0.765060 (0.086233)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    8.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Normal_KNN-PCA: 0.734940 (0.121702)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    9.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Uniform_KNN-PCA: 0.765060 (0.115540)
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:   10.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


PowerTransformer-yeo-johnson_KNN-PCA: 0.759036 (0.108579)
Fitting 10 folds for each of 32 candidates, totalling 320 fits
Normalizer_KNN-PCA: 0.716867 (0.068914)

##############################Results##############################
[0m_KNN: 0.855422 (0.047806) [test_score:0.857]
[0mScaled_KNN: 0.825301 (0.094589) [test_score:0.952]
[0mMinMax_KNN: 0.843373 (0.061421) [test_score:0.857]
[0mMaxAbsScaler_KNN: 0.843373 (0.061421) [test_score:0.857]
[0mRobustScaler_KNN: 0.837349 (0.073886) [test_score:0.929]
[0mQuantileTransformer-Normal_KNN: 0.813253 (0.099759) [test_score:0.881]
[1mQuantileTransformer-Uniform_KNN: 0.891566 (0.050710) [test_score:0.905]
[1mPowerTransformer-yeo-johnson_KNN: 0.891566 (0.058800) [test_score:0.905]
[0mNormalizer_KNN: 0.801205 (0.091692) [test_score:0.881]

[0m_KNN-PCA: 0.855422 (0.047806) [test_score:0.857]
[0mScaled_KNN-PCA: 0.819277 (0.077372) [test_score:0.738]
[0mMinMax_KNN-PCA: 0.783133 (0.135372) [test_score:0.762]
[0mMaxAbsScaler_KNN-PCA: 0.78

[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.5s finished


# Hypertune SVM

In [81]:
from sklearn.model_selection import RandomizedSearchCV


C  = [x for x in np.arange(0.1, 2, 0.2)]
kernel   = ["linear", "poly", "rbf", "sigmoid"]

# Create the random grid
random_grid = {'SVM__C': C,
               'SVM__kernel': kernel,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_SVM' ,Pipeline([('SVM' , SVC(random_state=seed))])))
pipelines.append(('Scaled_SVM' ,Pipeline([('Scaler',StandardScaler()),('SVM' , SVC(random_state=seed))])))
pipelines.append(('MinMax_SVM',Pipeline([('Scaler',MinMaxScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('MaxAbsScaler_SVM',Pipeline([('Scaler',MaxAbsScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('RobustScaler_SVM',Pipeline([('Scaler',RobustScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_SVM',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_SVM',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('SVM',SVC(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_SVM',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('SVM',SVC(random_state=seed))])))
pipelines.append(('Normalizer_SVM',Pipeline([('Scaler',Normalizer()),('SVM',SVC(random_state=seed))])))

pipelines.append(('_SVM-PCA',Pipeline([('SVM',SVC(random_state=seed))])))
pipelines.append(('Scaled_SVM-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('MinMax_SVM-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('MaxAbsScaler_SVM-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('RobustScaler_SVM-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Normal_SVM-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer-Uniform_SVM-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('PowerTransformer-yeo-johnson_SVM-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))
pipelines.append(('Normalizer_SVM-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('SVM',SVC(random_state=seed))])))




results_mean = []
results_std = []
names = []
test_scores = []


kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
        
    test_scores.append(clf.score(X_test, y_test))

print_results2(names, results_mean,results_std, test_scores)

Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_SVM: 0.783133 (0.114643)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 184 out of 400 | elapsed:    0.3s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_SVM: 0.855422 (0.072632)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_SVM: 0.783133 (0.134634)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_SVM: 0.783133 (0.097068)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_SVM: 0.801205 (0.089616)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:   10.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Normal_SVM: 0.855422 (0.080169)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    9.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Uniform_SVM: 0.813253 (0.107352)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:   10.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


PowerTransformer-yeo-johnson_SVM: 0.867470 (0.096210)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Normalizer_SVM: 0.710843 (0.139746)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 184 out of 400 | elapsed:    0.3s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_SVM-PCA: 0.783133 (0.114643)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_SVM-PCA: 0.783133 (0.068430)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_SVM-PCA: 0.777108 (0.109443)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_SVM-PCA: 0.771084 (0.129133)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_SVM-PCA: 0.795181 (0.108223)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:   10.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Normal_SVM-PCA: 0.734940 (0.086618)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    9.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Uniform_SVM-PCA: 0.789157 (0.099026)
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   10.1s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:   11.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


PowerTransformer-yeo-johnson_SVM-PCA: 0.759036 (0.115976)
Fitting 10 folds for each of 40 candidates, totalling 400 fits
Normalizer_SVM-PCA: 0.632530 (0.088912)

##############################Results##############################
[0m_SVM: 0.783133 (0.114643) [test_score:0.738]
[0mScaled_SVM: 0.855422 (0.072632) [test_score:0.833]
[0mMinMax_SVM: 0.783133 (0.134634) [test_score:0.738]
[0mMaxAbsScaler_SVM: 0.783133 (0.097068) [test_score:0.762]
[0mRobustScaler_SVM: 0.801205 (0.089616) [test_score:0.810]
[0mQuantileTransformer-Normal_SVM: 0.855422 (0.080169) [test_score:0.857]
[0mQuantileTransformer-Uniform_SVM: 0.813253 (0.107352) [test_score:0.762]
[1mPowerTransformer-yeo-johnson_SVM: 0.867470 (0.096210) [test_score:0.881]
[0mNormalizer_SVM: 0.710843 (0.139746) [test_score:0.714]

[0m_SVM-PCA: 0.783133 (0.114643) [test_score:0.738]
[0mScaled_SVM-PCA: 0.783133 (0.068430) [test_score:0.738]
[0mMinMax_SVM-PCA: 0.777108 (0.109443) [test_score:0.690]
[0mMaxAbsScaler_SVM-PCA: 0.77

[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    0.5s finished


# Hypertune LogisticRegression

In [82]:
from sklearn.model_selection import RandomizedSearchCV


C  = [x for x in np.arange(0.1, 3, 0.2)]
penalty = ["l1", "l2"]
# dual = [True, False]
fit_intercept = [True, False]


# Create the random grid
random_grid = {'LR__C': C,
               'LR__penalty': penalty,
#                'LR__dual': dual,
               'LR__fit_intercept': fit_intercept
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_LR',Pipeline([('LR',LogisticRegression())])))
pipelines.append(('Scaled_LR',Pipeline([('Scaler',StandardScaler()),('LR',LogisticRegression())])))
pipelines.append(('MinMax_LR',Pipeline([('Scaler',MinMaxScaler()),('LR',LogisticRegression())])))
pipelines.append(('MaxAbsScaler_LR',Pipeline([('Scaler',MaxAbsScaler()),('LR',LogisticRegression())])))
pipelines.append(('RobustScaler_LR',Pipeline([('Scaler',RobustScaler()),('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Normal_LR',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')),('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Uniform_LR',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')),('LR',LogisticRegression())])))
pipelines.append(('PowerTransformer-yeo-johnson_LR',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')),('LR',LogisticRegression())])))
pipelines.append(('Normalizer_LR',Pipeline([('Scaler',Normalizer()),('LR',LogisticRegression())])))

pipelines.append(('_LR-PCA',Pipeline([('LR',LogisticRegression())])))
pipelines.append(('Scaled_LR-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('MinMax_LR-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('MaxAbsScaler_LR-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('RobustScaler_LR-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Normal_LR-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='normal')), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer-Uniform_LR-PCA',Pipeline([('Scaler',QuantileTransformer(output_distribution='uniform')), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('PowerTransformer-yeo-johnson_LR-PCA',Pipeline([('Scaler',PowerTransformer(method='yeo-johnson')), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))
pipelines.append(('Normalizer_LR-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('LR',LogisticRegression())])))




results_mean = []
results_std = []
names = []
test_scores = []


kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
        
    test_scores.append(clf.score(X_test, y_test))

print_results2(names, results_mean,results_std, test_scores)

Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_LR: 0.777108 (0.085984)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_LR: 0.753012 (0.103062)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_LR: 0.783133 (0.111211)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_LR: 0.789157 (0.111642)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 178 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    1.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_LR: 0.783133 (0.110311)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   14.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Normal_LR: 0.771084 (0.135003)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   16.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Uniform_LR: 0.813253 (0.116741)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   21.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


PowerTransformer-yeo-johnson_LR: 0.801205 (0.110630)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Normalizer_LR: 0.746988 (0.124564)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_LR-PCA: 0.777108 (0.085984)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_LR-PCA: 0.777108 (0.124496)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_LR-PCA: 0.765060 (0.124579)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_LR-PCA: 0.771084 (0.128618)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    2.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_LR-PCA: 0.746988 (0.131147)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   15.3s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   25.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Normal_LR-PCA: 0.771084 (0.143357)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   13.1s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   22.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer-Uniform_LR-PCA: 0.801205 (0.102417)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    6.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   14.4s
[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   25.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


PowerTransformer-yeo-johnson_LR-PCA: 0.783133 (0.120667)
Fitting 10 folds for each of 60 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    0.6s


Normalizer_LR-PCA: 0.656627 (0.084885)

##############################Results##############################
[0m_LR: 0.777108 (0.085984) [test_score:0.810]
[0mScaled_LR: 0.753012 (0.103062) [test_score:0.762]
[0mMinMax_LR: 0.783133 (0.111211) [test_score:0.762]
[0mMaxAbsScaler_LR: 0.789157 (0.111642) [test_score:0.762]
[0mRobustScaler_LR: 0.783133 (0.110311) [test_score:0.738]
[0mQuantileTransformer-Normal_LR: 0.771084 (0.135003) [test_score:0.810]
[1mQuantileTransformer-Uniform_LR: 0.813253 (0.116741) [test_score:0.762]
[0mPowerTransformer-yeo-johnson_LR: 0.801205 (0.110630) [test_score:0.810]
[0mNormalizer_LR: 0.746988 (0.124564) [test_score:0.690]

[0m_LR-PCA: 0.777108 (0.085984) [test_score:0.810]
[0mScaled_LR-PCA: 0.777108 (0.124496) [test_score:0.738]
[0mMinMax_LR-PCA: 0.765060 (0.124579) [test_score:0.690]
[0mMaxAbsScaler_LR-PCA: 0.771084 (0.128618) [test_score:0.714]
[0mRobustScaler_LR-PCA: 0.746988 (0.131147) [test_score:0.714]
[0mQuantileTransformer-Normal_LR-PC

[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:    1.1s finished


# Hypertune LinearDiscriminantAnalysis

In [46]:
from sklearn.model_selection import RandomizedSearchCV


solver  = ["lsqr", "eigen"]
shrinkage = ["auto",None, 0.1,0.3,0.5,0.7,0.9]
# Create the random grid
random_grid = {'LDA__solver': solver,
               'LDA__shrinkage': shrinkage
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_LDA',Pipeline([('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Scaled_LDA',Pipeline([('Scaler',StandardScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MinMax_LDA',Pipeline([('Scaler',MinMaxScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MaxAbsScaler_LDA',Pipeline([('Scaler',MaxAbsScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('RobustScaler_LDA',Pipeline([('Scaler',RobustScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer_LDA',Pipeline([('Scaler',QuantileTransformer()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Normalizer_LDA',Pipeline([('Scaler',Normalizer()),('LDA',LinearDiscriminantAnalysis())])))

pipelines.append(('_LDA-PCA',Pipeline([('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Scaled_LDA-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MinMax_LDA-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MaxAbsScaler_LDA-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('RobustScaler_LDA-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer_LDA-PCA',Pipeline([('Scaler',QuantileTransformer()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Normalizer_LDA-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('LDA',LinearDiscriminantAnalysis())])))



results_mean = []
results_std = []
names = []
test_scores = []


kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    
    print(msg)
        
    test_scores.append(clf.score(X_test, y_test))

print_results2(names, results_mean,results_std, test_scores)

Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished


_LDA: 0.789157 (0.133339)
Test score 0.6904761904761905
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_LDA: 0.777108 (0.091717)
Test score 0.8095238095238095
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_LDA: 0.777108 (0.095270)
Test score 0.7380952380952381
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_LDA: 0.777108 (0.095270)
Test score 0.7380952380952381
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_LDA: 0.777108 (0.092079)
Test score 0.7857142857142857
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    3.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer_LDA: 0.807229 (0.104187)
Test score 0.8333333333333334
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Normalizer_LDA: 0.746988 (0.121868)
Test score 0.6904761904761905
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


_LDA-PCA: 0.789157 (0.133339)
Test score 0.6904761904761905
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Scaled_LDA-PCA: 0.771084 (0.136876)
Test score 0.7142857142857143
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MinMax_LDA-PCA: 0.771084 (0.128618)
Test score 0.6666666666666666
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


MaxAbsScaler_LDA-PCA: 0.771084 (0.128618)
Test score 0.6904761904761905
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


RobustScaler_LDA-PCA: 0.740964 (0.122839)
Test score 0.7142857142857143
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    3.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


QuantileTransformer_LDA-PCA: 0.789157 (0.102542)
Test score 0.7619047619047619
Fitting 10 folds for each of 14 candidates, totalling 140 fits
Normalizer_LDA-PCA: 0.656627 (0.086051)
Test score 0.5952380952380952

##############################Results##############################
[0m_LDA: 0.789157 (0.133339)
[0mScaled_LDA: 0.777108 (0.091717)
[0mMinMax_LDA: 0.777108 (0.095270)
[0mMaxAbsScaler_LDA: 0.777108 (0.095270)
[0mRobustScaler_LDA: 0.777108 (0.092079)
[1mQuantileTransformer_LDA: 0.807229 (0.104187)
[0mNormalizer_LDA: 0.746988 (0.121868)

[0m_LDA-PCA: 0.789157 (0.133339)
[0mScaled_LDA-PCA: 0.771084 (0.136876)
[0mMinMax_LDA-PCA: 0.771084 (0.128618)
[0mMaxAbsScaler_LDA-PCA: 0.771084 (0.128618)
[0mRobustScaler_LDA-PCA: 0.740964 (0.122839)
[0mQuantileTransformer_LDA-PCA: 0.789157 (0.102542)
[0mNormalizer_LDA-PCA: 0.656627 (0.086051)


[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.1s finished


# Hypertune Multi Layer Perceptron Classifier

In [47]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
import itertools

hidden_layer_sizes = [(x,y) for x,y in itertools.product([x for x in range(1,3)],[x for x in range(5,120,5)])]
activation = [ "tanh", "relu"]
solver = ["lbfgs", "sgd", "adam"]
alpha = [0.1,0.001,0.0001]
learning_rate = ["constant", "invscaling", "adaptive"]
# Create the random grid
random_grid = {'MLP__hidden_layer_sizes': hidden_layer_sizes,
               'MLP__activation': activation,
               'MLP__solver': solver,
               'MLP__alpha': alpha,
               'MLP__learning_rate': learning_rate,
               'MLP__hidden_layer_sizes': hidden_layer_sizes,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_MLP',Pipeline([('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Scaled_MLP',Pipeline([('Scaler',StandardScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MinMax_MLP',Pipeline([('Scaler',MinMaxScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_MLP',Pipeline([('Scaler',MaxAbsScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_MLP',Pipeline([('Scaler',RobustScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_MLP',Pipeline([('Scaler',QuantileTransformer()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Normalizer_MLP',Pipeline([('Scaler',Normalizer()),('MLP',MLPClassifier(random_state=seed))])))

pipelines.append(('_MLP-PCA',Pipeline([('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Scaled_MLP-PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MinMax_MLP-PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_MLP-PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_MLP-PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_MLP-PCA',Pipeline([('Scaler',QuantileTransformer()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Normalizer_MLP-PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=4)), ('MLP',MLPClassifier(random_state=seed))])))


results_mean = []
results_std = []
names = []
test_scores = []


kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
    
    test_scores.append(clf.score(X_test, y_test))

print_results2(names, results_mean,results_std, test_scores)

Fitting 10 folds for each of 2484 candidates, totalling 24840 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 682 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done 1248 tasks      | elapsed:   17.0s
[Parallel(n_jobs=-1)]: Done 1978 tasks      | elapsed:   28.0s
[Parallel(n_jobs=-1)]: Done 2868 tasks      | elapsed:   41.9s
[Parallel(n_jobs=-1)]: Done 3922 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 5136 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 6514 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 8052 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 9754 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 11616 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 14162 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 17858 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 22558 tasks   

_MLP: 0.849398 (0.066538)
Fitting 10 folds for each of 2484 candidates, totalling 24840 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 682 tasks      | elapsed:   13.3s
[Parallel(n_jobs=-1)]: Done 1248 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 1978 tasks      | elapsed:   40.5s
[Parallel(n_jobs=-1)]: Done 2868 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 3922 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 5136 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 6514 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 8052 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 9754 tasks      | elapsed:  3.6min


KeyboardInterrupt: 

# Try on other datasets

In [7]:
from sklearn.datasets import load_iris
from sklearn.datasets import fetch_covtype, fetch_olivetti_faces

fetched_dataset = fetch_olivetti_faces()
# iris = load_iris()
X, y = fetched_dataset.data, fetched_dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
X_train

downloading Olivetti faces from https://ndownloader.figshare.com/files/5976027 to C:\Users\User\scikit_learn_data


array([[0.5082645 , 0.60330576, 0.6198347 , ..., 0.33471075, 0.3429752 ,
        0.3429752 ],
       [0.78099173, 0.7768595 , 0.77272725, ..., 0.1694215 , 0.1694215 ,
        0.1694215 ],
       [0.59504133, 0.661157  , 0.69008267, ..., 0.17355372, 0.20661157,
        0.17355372],
       ...,
       [0.45454547, 0.3677686 , 0.23966943, ..., 0.446281  , 0.45041323,
        0.45454547],
       [0.14876033, 0.14876033, 0.14876033, ..., 0.4876033 , 0.46694216,
        0.27272728],
       [0.61157024, 0.72727275, 0.74380165, ..., 0.37190083, 0.47933885,
        0.6694215 ]], dtype=float32)

In [147]:
# Print train as Dataframe
# pd.DataFrame(X_train, columns=iris.feature_names)

In [8]:
num_folds=10
scoring='accuracy'
models = []
models.append(('LR',  LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART',DecisionTreeClassifier(random_state=seed)))
models.append(('NB',  GaussianNB()))
models.append(('SVM', SVC(random_state=seed)))
models.append(('RF',  RandomForestClassifier(max_depth=3, random_state=seed)))
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    

LR: 0.965625 (0.025958)




LDA: 0.975000 (0.027243)
KNN: 0.740625 (0.102746)
CART: 0.468750 (0.069877)
NB: 0.759375 (0.092755)
SVM: 0.021875 (0.044305)
RF: 0.246875 (0.088884)


In [11]:
# Standardize the dataset
pipelines = []
pipelines.append(('ScaledLR',Pipeline([('Scaler',StandardScaler()),('LR',LogisticRegression())])))
pipelines.append(('ScaledLDA',Pipeline([('Scaler',StandardScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('ScaledKNN',Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('ScaledCART',Pipeline([('Scaler',StandardScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('ScaledNB',Pipeline([('Scaler',StandardScaler()),('NB',GaussianNB())])))
pipelines.append(('ScaledSVM' ,Pipeline([('Scaler',StandardScaler()),('SVM' , SVC(random_state=seed))])))
pipelines.append(('ScaledRF' ,Pipeline([('Scaler',StandardScaler()),('RF' , RandomForestClassifier(max_depth=3, random_state=seed))])))
results = []
names = []
for name, model in pipelines:
    kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

ScaledLR: 0.934375 (0.066218)




ScaledLDA: 0.975000 (0.027243)
ScaledKNN: 0.725000 (0.093541)
ScaledCART: 0.475000 (0.075000)
ScaledNB: 0.759375 (0.092755)
ScaledSVM: 0.881250 (0.065252)
ScaledRF: 0.246875 (0.088884)


In [15]:
from __future__ import print_function

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import Normalizer

import numpy as np

import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib import cm

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import PowerTransformer

from sklearn.datasets import fetch_california_housing



# std_scale = StandardScaler().fit(X_train)
distributions = [
    ('Data after standard scaling',
        StandardScaler()),
    ('Data after min-max scaling',
        MinMaxScaler()),
    ('Data after max-abs scaling',
        MaxAbsScaler()),
    ('Data after robust scaling',
        RobustScaler(quantile_range=(25, 75))),
    ('Data after power transformation (Yeo-Johnson)',
     PowerTransformer(method='yeo-johnson')),
#     ('Data after power transformation (Box-Cox)',
#      PowerTransformer(method='box-cox')),
    ('Data after quantile transformation (gaussian pdf)',
        QuantileTransformer(output_distribution='normal')
        ),
    ('Data after quantile transformation (uniform pdf)',
        QuantileTransformer(output_distribution='uniform')
        ),
    ('Data after sample-wise L2 normalizing',
        Normalizer()),
]



print("No normalization or standartization")
svc_scaled = SVC(C=1.5, random_state=seed)
fit_std = svc_scaled.fit(X_train, y_train)
pred_train_std = svc_scaled.predict(X_train)

print('\nPrediction accuracy for the training dataset')
print('{:.2%}'.format(metrics.accuracy_score(y_train, pred_train_std)))
pred_test_std = svc_scaled.predict(X_test)

print('\nPrediction accuracy for the test dataset')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test_std)))


print("#"*50)

    
for name, dist in distributions:
    print(name)
    std_scale = dist.fit(X_train)
    X_train_std = std_scale.transform(X_train)
    X_test_std = std_scale.transform(X_test)

    # on standardized data
    svc_scaled = SVC(C=1.5, random_state=seed)
    fit_std = svc_scaled.fit(X_train_std, y_train)
    pred_train_std = svc_scaled.predict(X_train_std)

    print('\nPrediction accuracy for the training dataset')
    print('{:.2%}'.format(metrics.accuracy_score(y_train, pred_train_std)))
    
    pred_test_std = svc_scaled.predict(X_test_std)

    print('\nPrediction accuracy for the test dataset')
    print('{:.2%}\n'.format(metrics.accuracy_score(y_test, pred_test_std)))
    
    
    print("#"*50)


No normalization or standartization

Prediction accuracy for the training dataset
12.50%

Prediction accuracy for the test dataset
0.00%

##################################################
Data after standard scaling

Prediction accuracy for the training dataset
100.00%

Prediction accuracy for the test dataset
98.75%

##################################################
Data after min-max scaling

Prediction accuracy for the training dataset
12.81%

Prediction accuracy for the test dataset
0.00%

##################################################
Data after max-abs scaling

Prediction accuracy for the training dataset
12.50%

Prediction accuracy for the test dataset
0.00%

##################################################
Data after robust scaling

Prediction accuracy for the training dataset
100.00%

Prediction accuracy for the test dataset
98.75%

##################################################
Data after power transformation (Yeo-Johnson)

Prediction accuracy for the training dat

In [25]:
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA

# Standardize the dataset
pipelines = []
pipelines.append(('_LR',Pipeline([('LR',LogisticRegression())])))
pipelines.append(('Scaled_LR',Pipeline([('Scaler',StandardScaler()),('LR',LogisticRegression())])))
pipelines.append(('MinMax_LR',Pipeline([('Scaler',MinMaxScaler()),('LR',LogisticRegression())])))
pipelines.append(('MaxAbsScaler_LR',Pipeline([('Scaler',MaxAbsScaler()),('LR',LogisticRegression())])))
pipelines.append(('RobustScaler_LR',Pipeline([('Scaler',RobustScaler()),('LR',LogisticRegression())])))
pipelines.append(('QuantileTransformer_LR',Pipeline([('Scaler',QuantileTransformer()),('LR',LogisticRegression())])))
pipelines.append(('Normalizer_LR',Pipeline([('Scaler',Normalizer()),('LR',LogisticRegression())])))

pipelines.append(('_LDA',Pipeline([('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Scaled_LDA',Pipeline([('Scaler',StandardScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MinMax_LDA',Pipeline([('Scaler',MinMaxScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('MaxAbsScaler_LDA',Pipeline([('Scaler',MaxAbsScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('RobustScaler_LDA',Pipeline([('Scaler',RobustScaler()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('QuantileTransformer_LDA',Pipeline([('Scaler',QuantileTransformer()),('LDA',LinearDiscriminantAnalysis())])))
pipelines.append(('Normalizer_LDA',Pipeline([('Scaler',Normalizer()),('LDA',LinearDiscriminantAnalysis())])))

pipelines.append(('_KNN',Pipeline([('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_KNN',Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MinMax_KNN',Pipeline([('Scaler',MinMaxScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MaxAbsScaler_KNN',Pipeline([('Scaler',MaxAbsScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('RobustScaler_KNN',Pipeline([('Scaler',RobustScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer_KNN',Pipeline([('Scaler',QuantileTransformer()),('KNN',KNeighborsClassifier())])))
pipelines.append(('Normalizer_KNN',Pipeline([('Scaler',Normalizer()),('KNN',KNeighborsClassifier())])))

pipelines.append(('_CART',Pipeline([('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('Scaled_CART',Pipeline([('Scaler',StandardScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('MinMax_CART',Pipeline([('Scaler',MinMaxScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_CART',Pipeline([('Scaler',MaxAbsScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_CART',Pipeline([('Scaler',RobustScaler()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_CART',Pipeline([('Scaler',QuantileTransformer()),('CART',DecisionTreeClassifier(random_state=seed))])))
pipelines.append(('Normalizer_CART',Pipeline([('Scaler',Normalizer()),('CART',DecisionTreeClassifier(random_state=seed))])))

pipelines.append(('_NB',Pipeline([('NB',GaussianNB())])))
pipelines.append(('Scaled_NB',Pipeline([('Scaler',StandardScaler()),('NB',GaussianNB())])))
pipelines.append(('MinMax_NB',Pipeline([('Scaler',MinMaxScaler()),('NB',GaussianNB())])))
pipelines.append(('MaxAbsScaler_NB',Pipeline([('Scaler',MaxAbsScaler()),('NB',GaussianNB())])))
pipelines.append(('RobustScaler_NB',Pipeline([('Scaler',RobustScaler()),('NB',GaussianNB())])))
pipelines.append(('QuantileTransformer_NB',Pipeline([('Scaler',QuantileTransformer()),('NB',GaussianNB())])))
pipelines.append(('Normalizer_NB',Pipeline([('Scaler',Normalizer()),('NB',GaussianNB())])))

pipelines.append(('_NB_PCA',Pipeline([('NB',GaussianNB())])))
pipelines.append(('Scaled_NB_PCA',Pipeline([('Scaler',StandardScaler()), ('PCA', PCA(n_components=2)), ('NB',GaussianNB())])))
pipelines.append(('MinMax_NB_PCA',Pipeline([('Scaler',MinMaxScaler()), ('PCA', PCA(n_components=2)), ('NB',GaussianNB())])))
pipelines.append(('MaxAbsScaler_NB_PCA',Pipeline([('Scaler',MaxAbsScaler()), ('PCA', PCA(n_components=2)), ('NB',GaussianNB())])))
pipelines.append(('RobustScaler_NB_PCA',Pipeline([('Scaler',RobustScaler()), ('PCA', PCA(n_components=2)), ('NB',GaussianNB())])))
pipelines.append(('QuantileTransformer_NB_PCA',Pipeline([('Scaler',QuantileTransformer()), ('PCA', PCA(n_components=2)), ('NB',GaussianNB())])))
pipelines.append(('Normalizer_NB_PCA',Pipeline([('Scaler',Normalizer()), ('PCA', PCA(n_components=2)), ('NB',GaussianNB())])))


pipelines.append(('_SVM' ,Pipeline([('SVM' , SVC(random_state=seed))])))
pipelines.append(('Scaled_SVM' ,Pipeline([('Scaler',StandardScaler()),('SVM' , SVC(random_state=seed))])))
pipelines.append(('MinMax_SVM',Pipeline([('Scaler',MinMaxScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('MaxAbsScaler_SVM',Pipeline([('Scaler',MaxAbsScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('RobustScaler_SVM',Pipeline([('Scaler',RobustScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer_SVM',Pipeline([('Scaler',QuantileTransformer()), ('SVM',SVC(random_state=seed))])))
pipelines.append(('Normalizer_SVM',Pipeline([('Scaler',Normalizer()), ('SVM',SVC(random_state=seed))])))

pipelines.append(('_RF' ,Pipeline([('RF' , RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Scaled_RF' ,Pipeline([('Scaler',StandardScaler()),('RF' , RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MinMax_RF',Pipeline([('Scaler',MinMaxScaler()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('MaxAbsScaler_RF',Pipeline([('Scaler',MaxAbsScaler()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('RobustScaler_RF',Pipeline([('Scaler',RobustScaler()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('QuantileTransformer_RF',Pipeline([('Scaler',QuantileTransformer()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))
pipelines.append(('Normalizer_RF',Pipeline([('Scaler',Normalizer()),('RF',RandomForestClassifier(max_depth=3, random_state=seed))])))

pipelines.append(('_MLP',Pipeline([('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Scaled_MLP',Pipeline([('Scaler',StandardScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MinMax_MLP',Pipeline([('Scaler',MinMaxScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_MLP',Pipeline([('Scaler',MaxAbsScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_MLP',Pipeline([('Scaler',RobustScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_MLP',Pipeline([('Scaler',QuantileTransformer()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Normalizer_MLP',Pipeline([('Scaler',Normalizer()),('MLP',MLPClassifier(random_state=seed))])))



results = []
names = []
for name, model in pipelines:
    kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    
print_results(names, results)

KeyboardInterrupt: 

In [19]:
print_results(names, results)


##############################Results##############################
[0m_LR: 0.965625 (0.025958)
[0mScaled_LR: 0.934375 (0.066218)
[0mMinMax_LR: 0.968750 (0.019764)
[0mMaxAbsScaler_LR: 0.965625 (0.025958)
[1mRobustScaler_LR: 0.978125 (0.020010)
[0mQuantileTransformer_LR: 0.971875 (0.021875)
[0mNormalizer_LR: 0.071875 (0.031406)

[1m_LDA: 0.975000 (0.027243)
[0mScaled_LDA: 0.975000 (0.027243)
[0mMinMax_LDA: 0.975000 (0.027243)
[0mMaxAbsScaler_LDA: 0.975000 (0.027243)
[0mRobustScaler_LDA: 0.975000 (0.027243)
[0mQuantileTransformer_LDA: 0.975000 (0.018750)
[0mNormalizer_LDA: 0.956250 (0.031869)

[1m_KNN: 0.740625 (0.102746)
[0mScaled_KNN: 0.725000 (0.093541)
[0mMinMax_KNN: 0.728125 (0.105558)
[0mMaxAbsScaler_KNN: 0.725000 (0.091430)
[0mRobustScaler_KNN: 0.734375 (0.079365)
[0mQuantileTransformer_KNN: 0.725000 (0.100584)
[0mNormalizer_KNN: 0.706250 (0.103833)

[0m_CART: 0.468750 (0.069877)
[0mScaled_CART: 0.475000 (0.075000)
[0mMinMax_CART: 0.471875 (0.071875)
[0mM

In [20]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import cross_val_score  


# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 20, num = 3)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Create the random grid
random_grid = {'RF__n_estimators': n_estimators,
               'RF__max_features': max_features,
               'RF__max_depth': max_depth,
               'RF__min_samples_split': min_samples_split,
               'RF__min_samples_leaf': min_samples_leaf,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_RF' ,Pipeline([('RF' , RandomForestClassifier(random_state=seed))])))
pipelines.append(('Scaled_RF' ,Pipeline([('Scaler',StandardScaler()),('RF' , RandomForestClassifier(random_state=seed))])))
pipelines.append(('MinMax_RF',Pipeline([('Scaler',MinMaxScaler()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_RF',Pipeline([('Scaler',MaxAbsScaler()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_RF',Pipeline([('Scaler',RobustScaler()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_RF',Pipeline([('Scaler',QuantileTransformer()),('RF',RandomForestClassifier(random_state=seed))])))
pipelines.append(('Normalizer_RF',Pipeline([('Scaler',Normalizer()),('RF',RandomForestClassifier(random_state=seed))])))


results_mean = []
results_std = []
names = []

for name, model in pipelines:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
    print("Test score", clf.score(X_test, y_test))


print_results2(names, results_mean,results_std)

Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   17.1s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:   32.7s
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:   56.0s
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 5816 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  5.2min finished


_RF: 0.850000 (0.058962)
Test score 0.925
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 660 tasks      | elapsed:   27.8s
[Parallel(n_jobs=-1)]: Done 943 tasks      | elapsed:   43.9s
[Parallel(n_jobs=-1)]: Done 1308 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 1753 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 2280 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 2887 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 3576 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 4345 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 5196 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 6127 tasks      | elapsed:  5.6min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  6.0min finished


Scaled_RF: 0.853125 (0.059375)
Test score 0.925
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:   10.3s
[Parallel(n_jobs=-1)]: Done 682 tasks      | elapsed:   27.4s
[Parallel(n_jobs=-1)]: Done 1248 tasks      | elapsed:   54.0s
[Parallel(n_jobs=-1)]: Done 1978 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 2699 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 3226 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 3833 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 4522 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 5291 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 6142 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  5.4min finished


MinMax_RF: 0.843750 (0.057622)
Test score 0.925
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 682 tasks      | elapsed:   26.2s
[Parallel(n_jobs=-1)]: Done 1248 tasks      | elapsed:   53.9s
[Parallel(n_jobs=-1)]: Done 1978 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 2849 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 3376 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 3983 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 4672 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 5441 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 6292 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  6.0min finished


MaxAbsScaler_RF: 0.850000 (0.058962)
Test score 0.925
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   26.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed:  7.8min
[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed:  9.8min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed: 12.2min
[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed: 14.8min
[Parallel(n_jobs=-1)]: Done 5816 tasks      | elapsed: 17.7min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed: 19.7min finished


RobustScaler_RF: 0.850000 (0.058962)
Test score 0.925
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   56.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed: 10.8min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed: 19.6min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed: 31.0min
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed: 44.7min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed: 60.8min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed: 79.4min
[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed: 100.5min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed: 124.2min
[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed: 150.3min
[Parallel(n_jobs=-1)]: Done 5816 tasks      | elapsed: 178.5min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed: 198.4min finished


QuantileTransformer_RF: 0.825000 (0.050775)
Test score 0.9
Fitting 10 folds for each of 648 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:   11.3s
[Parallel(n_jobs=-1)]: Done 645 tasks      | elapsed:   30.4s
[Parallel(n_jobs=-1)]: Done 928 tasks      | elapsed:   49.2s
[Parallel(n_jobs=-1)]: Done 1293 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 1738 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 2265 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 2872 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 3561 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 4330 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 5181 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 6112 tasks      | elapsed:  6.1min
[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:  6.4min finished


Normalizer_RF: 0.825000 (0.081729)
Test score 0.875

##############################Results##############################
[0m_RF: 0.850000 (0.058962)
[1mScaled_RF: 0.853125 (0.059375)
[0mMinMax_RF: 0.843750 (0.057622)
[0mMaxAbsScaler_RF: 0.850000 (0.058962)
[0mRobustScaler_RF: 0.850000 (0.058962)
[0mQuantileTransformer_RF: 0.825000 (0.050775)
[0mNormalizer_RF: 0.825000 (0.081729)


In [21]:
from sklearn.model_selection import RandomizedSearchCV


C  = [x for x in np.arange(0.1, 2, 0.2)]
kernel   = ["linear", "poly", "rbf", "sigmoid"]

# Create the random grid
random_grid = {'SVM__C': C,
               'SVM__kernel': kernel,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_SVM' ,Pipeline([('SVM' , SVC(random_state=seed))])))
pipelines.append(('Scaled_SVM' ,Pipeline([('Scaler',StandardScaler()),('SVM' , SVC(random_state=seed))])))
pipelines.append(('MinMax_SVM',Pipeline([('Scaler',MinMaxScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('MaxAbsScaler_SVM',Pipeline([('Scaler',MaxAbsScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('RobustScaler_SVM',Pipeline([('Scaler',RobustScaler()),('SVM',SVC(random_state=seed))])))
pipelines.append(('QuantileTransformer_SVM',Pipeline([('Scaler',QuantileTransformer()),('SVM',SVC(random_state=seed))])))
pipelines.append(('Normalizer_SVM',Pipeline([('Scaler',Normalizer()),('SVM',SVC(random_state=seed))])))


results_mean = []
results_std = []
names = []

kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
    print("Test score", clf.score(X_test, y_test))

print_results2(names, results_mean,results_std)

Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   35.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  1.6min finished


_SVM: 0.962500 (0.033657)
Test score 0.9875
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   35.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  1.6min finished


Scaled_SVM: 0.953125 (0.037630)
Test score 0.9875
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   36.4s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  1.6min finished


MinMax_SVM: 0.956250 (0.034799)
Test score 0.9875
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  1.6min finished


MaxAbsScaler_SVM: 0.962500 (0.033657)
Test score 0.9875
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   53.9s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  2.5min finished


RobustScaler_SVM: 0.959375 (0.034375)
Test score 0.9875
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed: 11.8min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 13.4min finished


QuantileTransformer_SVM: 0.959375 (0.039652)
Test score 0.9875
Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   37.8s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  1.6min finished


Normalizer_SVM: 0.028125 (0.038145)
Test score 0.0

##############################Results##############################
[1m_SVM: 0.962500 (0.033657)
[0mScaled_SVM: 0.953125 (0.037630)
[0mMinMax_SVM: 0.956250 (0.034799)
[1mMaxAbsScaler_SVM: 0.962500 (0.033657)
[0mRobustScaler_SVM: 0.959375 (0.034375)
[0mQuantileTransformer_SVM: 0.959375 (0.039652)
[0mNormalizer_SVM: 0.028125 (0.038145)


In [22]:
from sklearn.model_selection import RandomizedSearchCV


n_neighbors  = [int(x) for x in np.linspace(start = 1, stop = 20, num = 2)]
weights  = ["uniform","distance"]
algorithm = ["auto", "ball_tree", "kd_tree", "brute"]
leaf_size =[int(x) for x in np.linspace(start = 5, stop = 50, num = 2)]
p =[int(x) for x in np.linspace(start = 1, stop = 4, num = 1)]
# Create the random grid
random_grid = {'KNN__n_neighbors': n_neighbors,
               'KNN__weights': weights,
               'KNN__algorithm': algorithm,
               'KNN__leaf_size': leaf_size,
               'KNN__p': p,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_KNN',Pipeline([('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_KNN',Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MinMax_KNN',Pipeline([('Scaler',MinMaxScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('MaxAbsScaler_KNN',Pipeline([('Scaler',MaxAbsScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('RobustScaler_KNN',Pipeline([('Scaler',RobustScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('QuantileTransformer_KNN',Pipeline([('Scaler',QuantileTransformer()),('KNN',KNeighborsClassifier())])))
pipelines.append(('Normalizer_KNN',Pipeline([('Scaler',Normalizer()),('KNN',KNeighborsClassifier())])))


results_mean = []
results_std = []
names = []

kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)
    print("Test score", clf.score(X_test, y_test))

print_results2(names, results_mean,results_std)

Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   19.6s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:   41.4s finished


_KNN: 0.921875 (0.048914)
Test score 0.9875
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   21.3s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:   45.0s finished


Scaled_KNN: 0.925000 (0.046771)
Test score 0.9875
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   20.0s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:   43.0s finished


MinMax_KNN: 0.918750 (0.042390)
Test score 0.9875
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   21.3s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:   44.8s finished


MaxAbsScaler_KNN: 0.918750 (0.046771)
Test score 0.9875
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    8.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   41.2s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:  1.4min finished


RobustScaler_KNN: 0.925000 (0.046771)
Test score 0.9875
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed: 10.2min finished


QuantileTransformer_KNN: 0.925000 (0.048814)
Test score 0.9625
Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   18.7s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:   41.1s finished


Normalizer_KNN: 0.912500 (0.053765)
Test score 0.9875

##############################Results##############################
[0m_KNN: 0.921875 (0.048914)
[1mScaled_KNN: 0.925000 (0.046771)
[0mMinMax_KNN: 0.918750 (0.042390)
[0mMaxAbsScaler_KNN: 0.918750 (0.046771)
[1mRobustScaler_KNN: 0.925000 (0.046771)
[1mQuantileTransformer_KNN: 0.925000 (0.048814)
[0mNormalizer_KNN: 0.912500 (0.053765)


In [23]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
import itertools

hidden_layer_sizes = [(x,y) for x,y in itertools.product([x for x in range(1,3)],[x for x in range(5,120,5)])]
activation = [ "tanh", "relu"]
solver = ["lbfgs", "sgd", "adam"]
alpha = [0.1,0.001,0.0001]
learning_rate = ["constant", "invscaling", "adaptive"]
# Create the random grid
random_grid = {'MLP__hidden_layer_sizes': hidden_layer_sizes,
               'MLP__activation': activation,
               'MLP__solver': solver,
               'MLP__alpha': alpha,
               'MLP__learning_rate': learning_rate,
               'MLP__hidden_layer_sizes': hidden_layer_sizes,
              }

# Standardize the dataset
pipelines = []
pipelines.append(('_MLP',Pipeline([('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Scaled_MLP',Pipeline([('Scaler',StandardScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MinMax_MLP',Pipeline([('Scaler',MinMaxScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('MaxAbsScaler_MLP',Pipeline([('Scaler',MaxAbsScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('RobustScaler_MLP',Pipeline([('Scaler',RobustScaler()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('QuantileTransformer_MLP',Pipeline([('Scaler',QuantileTransformer()),('MLP',MLPClassifier(random_state=seed))])))
pipelines.append(('Normalizer_MLP',Pipeline([('Scaler',Normalizer()),('MLP',MLPClassifier(random_state=seed))])))


results_mean = []
results_std = []
names = []

kfold = model_selection.KFold(n_splits=10, random_state=seed)
for name, model in pipelines:
    clf = GridSearchCV(estimator = model, param_grid = random_grid, cv = kfold, verbose=2, n_jobs = -1)
    clf.fit(X_train, y_train)
    best_grid = clf.best_estimator_

    best_mean = clf.cv_results_["mean_test_score"][clf.best_index_]
    best_std = clf.cv_results_["std_test_score"][clf.best_index_]
    results_mean.append(best_mean)
    results_std.append(best_std)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, best_mean, best_std)
    print(msg)

print_results2(names, results_mean,results_std)

Fitting 10 folds for each of 2484 candidates, totalling 24840 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   24.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  9.6min
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed: 14.1min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed: 19.5min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed: 27.2min
[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed: 36.7min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed: 46.8min
[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed: 55.1min
[Parallel(n_jobs=-1)]: Done 5816 tasks      | elapsed: 63.2min
[Parallel(n_jobs=-1)]: Done 6829 tasks      | elapsed: 74.7min


KeyboardInterrupt: 