In [16]:
from itertools import product
import numpy as np
import pickle
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

def save_best (best_params_overall, y_pred_best, y_test):

    params_best_file = 'params_best.pkl'
    file = open(params_best_file, 'w')
    pickle.dump(best_params_overall, file)
    file.close()

    y_pred_best_file = 'y_pred_best.pkl'
    file = open(y_pred_best_file, 'w')
    pickle.dump(y_pred_best, file)
    file.close()

    y_test_file = 'y_test_best.pkl'
    file = open(y_test_file, 'w')
    pickle.dump(y_test, file)
    file.close()

components = [ 25, 50, 100 ]
y = pickle.load(open('target.pkl', 'r'))

best_accuracy_overall = 0
best_params_overall = None

for comp in components:
    
    print '-------------------------------------------------------'
    print
    print 'Training and optimisation report with {0} principal component'.format(comp)
    print
    filename = 'data-{0}-components.pkl'.format(comp)
    X = pickle.load(open(filename, 'r'))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.14, random_state=0)

    y_pred = np.empty(len(y_test), dtype=np.int)
    
    n_neighbors_param = [50, 100, 200]
    
    for n in n_neighbors_param:
        
        print 'KNN with {0} neighbours'.format(n)
        
        knn_clf = KNeighborsClassifier(n_neighbors=int('{}'.format(n)))
        knn_clf.fit(X_train, y_train)
        
        kneighbors = knn_clf.kneighbors(X_test, return_distance=False)
        
        best_accuracy = 0
        best_params = []
        
        c_param = [1,10,100,1000]     
        degree_param = [2,3,4]
        gamma_param = [1e-3, 1e-4]
        kernel_param = ['poly', 'rbf']

        for params in product(c_param, degree_param, gamma_param, kernel_param):
            
            c = params[0]
            degree = params[1]
            gamma = params[2]
            kernel = params[3]
        
            for idx, indices in enumerate(kneighbors):
                neighbors = [X_train[i] for i in indices]
                neighbors_labels = [y_train[i] for i in indices]

                if len(set(neighbors_labels)) == 1:
                    y_pred[idx] = neighbors_labels[0]
                else:
                    svm_clf = SVC(kernel='{}'.format(kernel), degree=int('{}'.format(degree)), gamma=float('{}'.format(gamma)), C=int('{}'.format(c)), random_state=0)
                    svm_clf.fit(neighbors, neighbors_labels)
                    label = svm_clf.predict(X_test[idx].reshape(1, -1))
                    y_pred[idx] = label
            
            accuracy = accuracy_score(y_test, y_pred)
            if accuracy >= best_accuracy:
                best_accuracy = accuracy
                best_params.append((c, degree, gamma, kernel))
                
                if best_accuracy > best_accuracy_overall:
                    
                    best_accuracy_overall = best_accuracy
                    best_params_overall = (comp, n, c, degree, gamma, kernel)
                    
                    save_best (best_params_overall, y_pred, y_test)
        
        print 'best accuracy', best_accuracy
        print 'best parameters', best_params
        print

    
    print '-------------------------------------------------------'
     
 

-------------------------------------------------------

Training and optimisation report with 25 principal component

KNN with 50 neighbours
best accuracy 0.981022344659
best parameters [(1, 2, 0.001, 'poly'), (1, 2, 0.0001, 'poly'), (1, 3, 0.001, 'poly'), (1, 3, 0.0001, 'poly'), (10, 3, 0.001, 'poly'), (10, 3, 0.0001, 'poly'), (100, 3, 0.001, 'poly'), (100, 3, 0.0001, 'poly'), (1000, 3, 0.001, 'poly'), (1000, 3, 0.0001, 'poly')]

KNN with 100 neighbours
best accuracy 0.982246709519
best parameters [(1, 2, 0.001, 'poly'), (1, 2, 0.0001, 'poly'), (1, 3, 0.001, 'poly'), (1, 3, 0.0001, 'poly'), (10, 3, 0.001, 'poly'), (10, 3, 0.0001, 'poly'), (100, 3, 0.001, 'poly'), (100, 3, 0.0001, 'poly'), (1000, 3, 0.001, 'poly'), (1000, 3, 0.0001, 'poly')]

KNN with 200 neighbours
best accuracy 0.980818283849
best parameters [(1, 2, 0.001, 'poly'), (1, 2, 0.0001, 'poly'), (10, 2, 0.001, 'poly'), (10, 2, 0.0001, 'poly'), (100, 2, 0.001, 'poly'), (100, 2, 0.0001, 'poly'), (1000, 2, 0.001, 'poly'), (10