# Dimensionality Reduction and K-Nearest Neighbors (KNN) Classification on EEG Data

In [1]:
import scipy.io
import librosa
import numpy as np
import matplotlib.pyplot as plt


def data(x):
    fft = 64
    hop = 48
    win = 64
    data = x[:, 0, 0]
    D = librosa.stft(data, n_fft=fft, hop_length=hop, win_length=win)
    D = D[3:8, :]
    D = D.T
    D = D.reshape(-1, 1)
    matrix = D
    for i in range(1, 3):
        D = librosa.stft(x[:, i, 0], n_fft=fft, hop_length=hop, win_length=win)
        D = D[3:8, :]
        D = D.T
        D = D.reshape(-1, 1)
        matrix = np.vstack((matrix, D))

    matrix1 = matrix

    for j in range(1, len(x[0][0])):
        data = x[:, 0, j]
        D = librosa.stft(data, n_fft=fft, hop_length=hop, win_length=win)
        D = D[3:8, :]
        D = D.T
        D = D.reshape(-1, 1)
        matrix = D
        for i in range(1, 3):
            D = librosa.stft(x[:, i, j], n_fft=fft, hop_length=hop, win_length=win)
            D = D[3:8, :]
            D = D.T
            D = D.reshape(-1, 1)
            matrix = np.vstack((matrix, D))
        matrix1 = np.hstack((matrix1, matrix))

    return matrix1


def reduced_dim_data(X):
    X_standardized = (X - np.mean(X, axis=1, keepdims=True))  
    cov_matrix = np.cov(X_standardized)
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

    sort_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvectors = eigenvectors[:, sort_indices]
    sorted_eigenvalues = eigenvalues[sort_indices]

    # x = np.linspace(1, 50, 50)
    # plt.plot(x, sorted_eigenvalues[0:50])
    # plt.scatter(x, sorted_eigenvalues[0:50], s=10, c='blue', edgecolor='blue', alpha=0.5)
    # plt.show()

    k = 25
    V = sorted_eigenvectors[:, :k]

    return V


def k_Neighbours(train, test, y_train):
    K = 3
    neighbour_labels = np.zeros((len(test[0]), K))
    for i in range(len(test[0])):
        norm = np.zeros((len(train[0]), 1))
        for j in range(len(train[0])):
            diff = test[:, i] - train[:, j]
            norm[j] = np.linalg.norm(diff)

        partition_indices = np.argpartition(norm, K, axis=0)
        neighbour_location = partition_indices[:K, 0]

        for l in range(K):
            neighbour_labels[i][l] = y_train[neighbour_location[l]]

    return neighbour_labels


if __name__ == '__main__':
    file = scipy.io.loadmat('/content/eeg.mat')
    x_test = file["x_te"]
    x_train = file["x_train"]
    y_test = file["y_te"]
    y_train = file["y_train"]

    train_input = data(x_train)
    V = reduced_dim_data(train_input)
    train_input_reduced = np.matmul(np.transpose(V), train_input)
    test_input = data(x_test)
    test_input_reduced = np.matmul(np.transpose(V), test_input)

    neighbour_labels = k_Neighbours(train_input_reduced, test_input_reduced, y_train)
    neighbour_labels = neighbour_labels.astype(int)

    y_predicted = np.zeros((len(neighbour_labels), 1))

    for lab in range(len(neighbour_labels)):
        num_ones = np.count_nonzero(neighbour_labels[lab] == 1)
        num_twos = np.count_nonzero(neighbour_labels[lab] == 2)

        if num_ones > num_twos:
            y_predicted[lab] = 1
        else:
            y_predicted[lab] = 2

    difference = abs(y_predicted - y_test)
    num_zeros = np.count_nonzero(difference == 0)
    accuracy = (num_zeros / len(y_test)) * 100

    print("accuracy is ", accuracy)


accuracy is  60.71428571428571


Graph below show the accuracy along various number of nearest neighbours K and selecting different PCs


\begin{array}{|c|l|l|l|l|l|l|l|}
\hline
\textbf{K/PCs} & {\textbf{10}} & {\textbf{15}} & {\textbf{20}} & {\textbf{25}} & {\textbf{30}} & {\textbf{35}} & {\textbf{40}} \\ \hline
\textbf{3}     & 53.57                            & 67.85                            & 64.28                            & 50                               & 60.71                            & 53.5                             & 50                               \\ \hline
\textbf{7}     & 46.42                            & 50                               & 42.8                             & 50                               & 50                               & 50                               & 53.5                             \\ \hline
\textbf{15}    & 39.28                            & 42.85                            & 46.4                             & 32.14                            & 42.85                            & 46.4                             & 50                               \\ \hline
\end{array}