In [27]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns; sns.set_style('white')  # plot formatting
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold


# Algorithms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# Dummy dataset
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data 
Y = (iris.target > 1.5).reshape(-1,1).astype(np.float)
Y[Y==0] = -1
X_and_Y = np.hstack((X, Y))
np.random.seed(1)
np.random.shuffle(X_and_Y) 
Y = np.ravel(Y)

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

In [29]:
y_train.shape

(120,)

In [15]:
pipe = Pipeline([('std', StandardScaler()),
                 ('classifier1', LinearDiscriminantAnalysis()),
                 ('classifier2', SVC())])
                 #('classifier3', KNeighborsClassifier())])

# Parameters to search
search_space = [{'classifier1': [LinearDiscriminantAnalysis()],
                 'classifier1__solver': ['svd', 'lsqr', 'eigen']},
                {'classifier2': [SVC()],
                 'classifier2__kernel': ['linear'],
                 'classifier2__C': np.logspace(-4, 4, 9)
                #{'classifier3': [KNeighborsClassifier()],
                 #'classifier3__weights': ['uniform', 'distance']
                 #'classifier3__n_neighbors': [i for i in range(1,11)]
                }]

# Create grid search 
clf1 = GridSearchCV(pipe, search_space, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model = clf1.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


NotImplementedError: transform not implemented for 'lsqr' solver (use 'svd' or 'eigen').

In [30]:
pipe_LDA = Pipeline([('std', StandardScaler()),
                 ('classifier', LinearDiscriminantAnalysis())])
search_space1 = [{'classifier': [LinearDiscriminantAnalysis()],
                 'classifier__solver': ['svd', 'lsqr', 'eigen']}]
# Create grid search 
clf = GridSearchCV(pipe_LDA, search_space1, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model_LDA = clf.fit(X_train, y_train)



In [33]:
#best_model.cv_results_

In [34]:
#best_model.cv_results_['params'][ np.argmin(best_model.cv_results_['rank_test_score']) ]

In [37]:
pipe_SVC = Pipeline([('std', StandardScaler()),
                     ('classifier', SVC())])
search_space2 = [{'classifier': [SVC()],
                 'classifier__kernel': ['linear'],
                 'classifier__C': np.logspace(-4, 4, 9)}]
# Create grid search 
clf2 = GridSearchCV(pipe_SVC, search_space2, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model_SVC = clf2.fit(X_train, y_train)



In [43]:
#best_model_SVC.cv_results_

In [44]:
#best_model_SVC.cv_results_['params'][ np.argmin(best_model.cv_results_['rank_test_score']) ]

In [46]:
pipe_KNN = Pipeline([('std', StandardScaler()),
                     ('classifier', KNeighborsClassifier())])
search_space3 = [{'classifier': [KNeighborsClassifier()],
                 'classifier__weights': ['uniform', 'distance'],
                 'classifier__n_neighbors': [i for i in range(1,11)]
                }]
# Create grid search 
clf3 = GridSearchCV(pipe_KNN, search_space3, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model_KNN = clf3.fit(X_train, y_train)



In [47]:
best_model_KNN.cv_results_

{'mean_fit_time': array([0.00178347, 0.00118465, 0.0013021 , 0.00118756, 0.00126448,
        0.00115385, 0.00134077, 0.00126505, 0.0012918 , 0.00121393,
        0.00130429, 0.00134568, 0.0011827 , 0.00090613, 0.00126739,
        0.0015523 , 0.00135183, 0.00120382, 0.00143065, 0.00106401]),
 'std_fit_time': array([7.45811693e-04, 1.95715800e-04, 1.40705314e-04, 1.88876396e-04,
        2.91053746e-04, 1.27997394e-04, 1.89815367e-04, 1.84831149e-04,
        2.13963448e-04, 1.72142679e-04, 1.82691718e-04, 2.08108824e-04,
        2.00935954e-04, 1.56912032e-05, 5.08870629e-04, 6.81896143e-05,
        4.64359586e-05, 4.00092803e-05, 2.46391897e-04, 3.61099559e-05]),
 'mean_score_time': array([0.00228748, 0.00115423, 0.0022337 , 0.00103803, 0.00303683,
        0.0013082 , 0.00206499, 0.00113835, 0.00223904, 0.00111351,
        0.00240417, 0.00114951, 0.00205665, 0.00085135, 0.00233655,
        0.00147886, 0.00228167, 0.00117021, 0.0024806 , 0.00104427]),
 'std_score_time': array([5.39083073e-

In [50]:
best_n = best_model_KNN.cv_results_['params'][ np.argmin(best_model.cv_results_['rank_test_score']) ]['classifier__n_neighbors']

{'classifier': KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                      metric_params=None, n_jobs=None, n_neighbors=9, p=2,
                      weights='distance'),
 'classifier__n_neighbors': 1,
 'classifier__weights': 'uniform'}