In [28]:
from sklearn import preprocessing
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn import neighbors, metrics
from sklearn.model_selection import KFold

data = pd.read_csv("winequality-red.csv", sep=";")
X = data[data.columns[:-1]].values
y = data["quality"].values
y_class = np.where(y<6, 0, 1)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y_class, test_size=0.3)
std_scale = preprocessing.StandardScaler().fit(X_train)
X_train_std = std_scale.transform(X_train)
X_test_std = std_scale.transform(X_test)
param_grid = {'n_neighbors':[3,5,7,9,11,13,15]}
score = 'accuracy'

In [29]:
class MyGridSearchCv:
    def __init__(self, param_grid, cv=1, scoring='accuracy'):
        self.cv = cv
        self.param_grid = param_grid
        self.scoring = scoring
        self.cv_results_ = {}

    def fit(self, X_train, y_train):
        k_fold = KFold(n_splits=self.cv)
        for k in self.param_grid:
            for train_indices, test_indices in k_fold.split(X_train):
                knn = neighbors.KNeighborsClassifier(k)
                knn.fit(train_indices, y_train)
                self.cv_results_[]


In [30]:
clf = model_selection.GridSearchCV(neighbors.KNeighborsClassifier(), param_grid, cv=5, scoring=score)
clf.fit(X_train_std, y_train)
print(clf.best_params_)
print("Résultats de la validation croisée :")
for mean, std, params in zip(
        clf.cv_results_['mean_test_score'],
        clf.cv_results_['std_test_score'],
        clf.cv_results_['params']
    ):

    print("{} = {:.3f} (+/-{:.03f}) for {}".format(
        score,
        mean,
        std*2,
        params
    ) )

y_pred = clf.predict(X_test_std)
print("\nSur le jeu de test : {:.3f}".format(metrics.accuracy_score(y_test, y_pred)))

{'n_neighbors': 13}
Résultats de la validation croisée :
accuracy = 0.687 (+/-0.023) for {'n_neighbors': 3}
accuracy = 0.702 (+/-0.024) for {'n_neighbors': 5}
accuracy = 0.708 (+/-0.029) for {'n_neighbors': 7}
accuracy = 0.710 (+/-0.027) for {'n_neighbors': 9}
accuracy = 0.710 (+/-0.028) for {'n_neighbors': 11}
accuracy = 0.715 (+/-0.055) for {'n_neighbors': 13}
accuracy = 0.710 (+/-0.029) for {'n_neighbors': 15}

Sur le jeu de test : 0.742
