In [5]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.io
from sklearn.decomposition import PCA

In [6]:
training = scipy.io.loadmat('ReducedImagesForTraining.mat')['images'].T
testing = scipy.io.loadmat('ReducedImagesForTesting.mat')['images'].T

In [21]:
def nearest_points(A, B, k):
    X = np.sum(A**2,axis = 1)[np.newaxis].T
    Y = np.sum(B**2,axis = 1)
    res = X + Y - 2. * np.dot(A,B.T)
    return np.argsort(res,axis=0)[:k,:].T
    
def KNN(trainData, testData, trainClass, testClass, k):
    error = 0
    error_list = []
    nearest = nearest_points(trainData,testData,k)
    length = testData.shape[0]
    nearestClass = np.zeros(length)
    for i in range(length):
        nearestClass[i] = np.argmax(np.bincount([trainClass[a] for a in nearest[i]]))
        if(nearestClass[i] != testClass[i]):
            error += 1
            error_list = np.append(error_list, [i])
    return error, error_list

def cross_validation(data, dataClass, k):
    size = data.shape[0]
    indices = np.arange(size)
    np.random.shuffle(indices)
    err = 0
    n = int(size/10)
    for i in range(10):
        mask1 = indices[np.concatenate((np.arange(i*n),np.arange((i+1)*n,size)))]
        mask2 = indices[np.arange(i*n,min((i+1)*n,size))]
        er, er_l = KNN(data[mask1],data[mask2],dataClass[mask1],dataClass[mask2],k)
        err += er
    return float(size - err)/(size) * 100

In [22]:
trainClass = np.floor(np.arange(training.shape[0])/5)+1
testClass = np.floor(np.arange(testing.shape[0])/2)+1

In [23]:
data = np.concatenate((training, testing), axis=0)
classes = np.concatenate((trainClass,testClass))

In [24]:
for i in range(1,8):
    print('For k =',i)
    score = cross_validation(data,classes,i)
    print('Result for raw data:', score)
    
    pca = PCA(0.65)
    pca.fit(data)
    print('Components: ',pca.n_components_)
    data_pc = pca.transform(data)
    score = cross_validation(data_pc,classes,i)
    print('PCA(0.65):', score)
    
    pca = PCA(0.85)
    pca.fit(data)
    data_pc = pca.transform(data)
    print('Components: ',pca.n_components_)
    score = cross_validation(data_pc,classes,i)
    print('PCA(0.85):', score)
    
    pca = PCA(0.91)
    pca.fit(data)
    data_pc = pca.transform(data)
    print('Components: ',pca.n_components_)
    score = cross_validation(data_pc,classes,i)
    print('PCA(0.91):', score)
    
    pca = PCA(0.99)
    pca.fit(data)
    data_pc = pca.transform(data)
    print('Components: ',pca.n_components_)
    score = cross_validation(data_pc,classes,i)
    print('PCA(0.99):', score)
    print('')

For k = 1
Result for raw data: 50.57142857142857
Components:  4
PCA(0.65): 33.14285714285714
Components:  19
PCA(0.85): 47.714285714285715
Components:  38
PCA(0.91): 49.142857142857146
Components:  192
PCA(0.99): 51.42857142857142

For k = 2
Result for raw data: 43.142857142857146
Components:  4
PCA(0.65): 26.0
Components:  19
PCA(0.85): 40.85714285714286
Components:  38
PCA(0.91): 43.714285714285715
Components:  192
PCA(0.99): 42.857142857142854

For k = 3
Result for raw data: 42.0
Components:  4
PCA(0.65): 24.857142857142858
Components:  19
PCA(0.85): 42.0
Components:  38
PCA(0.91): 45.42857142857143
Components:  192
PCA(0.99): 40.285714285714285

For k = 4
Result for raw data: 39.714285714285715
Components:  4
PCA(0.65): 23.714285714285715
Components:  19
PCA(0.85): 35.14285714285714
Components:  38
PCA(0.91): 37.714285714285715
Components:  192
PCA(0.99): 38.0

For k = 5
Result for raw data: 34.285714285714285
Components:  4
PCA(0.65): 19.428571428571427
Components:  19
PCA(0.85): 