In [68]:
import scipy.io as s
import numpy as np
import random
import math
from numpy import linalg as LA
import numpy.linalg as lin
import pandas as pd

In [69]:
eeg = s.loadmat('eeg.mat')
x_test = np.asarray(eeg['x_te'])
x_train = np.asarray(eeg['x_train'])
y_test = np.asarray(eeg['y_te'])
y_train = np.asarray(eeg['y_train'])

In [70]:
# Get DFT matrix
def get_F(N):
    F = np.zeros((N, N), dtype = complex) 
    for i in range(0, N):
        for k in range(0, N):
            F[i][k] = math.cos(2 * math.pi * i * k / N) - math.sin(2 * math.pi * i * k / N)*1j
    return F

In [71]:
def get_X(data, blackman_values, length, N):
    end = 0
    col = 0
    val = np.zeros(N)
    X = np.zeros((N, length))
    for i in range(0, data.shape[0], int(3 * N / 4)):
        end = i + N
        if(end > data.size):
            end = data.size
        val[0:int(end - i)] = data[i : i + N]
        X[: ,col] = val * blackman_values
        val = np.zeros(N)
        col = col + 1
    return X

In [72]:
def window(data, hopsize):
    return int(np.ceil(data.shape[0]/ hopsize))

Set hop size, overlap is 75 %

In [73]:
N = 64
hopsize = 48
F = np.zeros((N, N), dtype=np.complex)
F = get_F(N)
blackman_values = np.blackman(N)
train_len = window(x_train, hopsize)
test_len = window(x_test, hopsize)

In [74]:
def get_STFT(data, length):
    channel1 = data[:,0,:]
    channel2 = data[:,1,:]
    channel3 = data[:,2,:]
    X1 = np.zeros((N, length + 1, data.shape[2]))
    X2 = np.zeros((N, length + 1, data.shape[2]))
    X3 = np.zeros((N, length + 1, data.shape[2]))
    print(X1.shape)
    for i in range(data.shape[2]):
        X1[:,:,i] = get_X(channel1[:,i], blackman_values, length + 1, N)
        X2[:,:,i] = get_X(channel2[:,i], blackman_values, length + 1, N)
        X3[:,:,i] = get_X(channel3[:,i], blackman_values, length + 1, N)
    FX1 = np.zeros((X1.shape), dtype = np.complex)
    FX2 = np.zeros((X2.shape), dtype = np.complex)
    FX3 = np.zeros((X3.shape), dtype = np.complex)
    for i in range(0, data.shape[2]):
        FX1[:, :, i] = np.dot(F, X1[:, :, i])
        FX2[:, :, i] = np.dot(F, X2[:, :, i])
        FX3[:, :, i] = np.dot(F, X3[:, :, i])
    
    return FX1, FX2, FX3
    

Only interested in MU Wave

In [75]:
def get_Muwave(FX1, FX2, FX3, data):
    FX1 = FX1[2:7, :, :]
    FX2 = FX2[2:7, :, :]
    FX3 = FX3[2:7, :, :]
    Mu_wave = np.zeros((5*17*3, data.shape[2]), dtype = np.complex)
    for i in range(data.shape[2]):
        input_sample = np.append(FX1[:,:,i].reshape(FX1.shape[0]* FX1.shape[1]), np.append(FX2[:,:,i].reshape(FX2.shape[0]* FX2.shape[1]), FX3[:,:,i].reshape(FX3.shape[0]* FX3.shape[1])))
        Mu_wave[:,i] = input_sample
    return Mu_wave

Added empty last column

In [76]:
FX1_train, FX2_train, FX3_train = get_STFT(x_train, train_len)
FX1_test, FX2_test, FX3_test = get_STFT(x_test, test_len)

(64, 17, 112)
(64, 17, 28)


In [77]:
FX1_train.shape
FX1_test.shape

(64, 17, 28)

In [78]:
Muwave_train =  get_Muwave(FX1_train, FX2_train, FX3_train, x_train)
Muwave_test =  get_Muwave(FX1_test, FX2_test, FX3_test, x_test)

In [79]:
Muwave_train.shape

(255, 112)

In [80]:
# Calculates the covariance for a given matrix

def cov_matrix(matrix):
    sample = matrix.shape[0] -1
    centered_matrix = (np.transpose(matrix) - np.mean(matrix, axis = 1)).transpose()
    covmat = np.dot(centered_matrix, centered_matrix.transpose()) / sample
    return covmat

Doing the PCA

In [81]:
def PCA(train, test, M):
    cov_X = cov_matrix(train)
    e_vals, e_vecs = LA.eig(cov_X)
    train_mpc = np.dot(e_vecs[:,0:M].transpose(), train)
    test_mpc = np.dot(e_vecs[:,0:M].transpose(), test)
    return train_mpc, test_mpc

Random Projection

In [82]:
def random_projection(L, M, train_mpc, test_mpc):
    A = np.random.rand(L, M)
    for i in range (0, L):
        A[i,:] = A[i,:] / lin.norm(A[i,:])
    train_mpc = train_mpc.real
    test_mpc = test_mpc.real
    train_proj = np.dot(A, train_mpc)
    test_proj = np.dot(A, test_mpc)
    return np.sign(train_proj), np.sign(test_proj)

Hamming distance is the sum of different values

In [83]:
def hamming(a, b):
    return np.count_nonzero(a != b)

Accuracy value for predited and Ground Truth

In [84]:
def accuracy(predict, ground_truth):
    acc = len([i for i, j in zip(predict, ground_truth) if i == j])/28
    return acc

Stores the hamming Distance between points

In [85]:
def hamm_distance(x_train, x_test, train_proj, test_proj):
    hamm_distance = np.zeros((x_test.shape[2], x_train.shape[2]))
    for i in range(x_test.shape[2]):
        for j in range(x_train.shape[2]):
            hamm_distance[i][j] = hamming(train_proj[:,j], test_proj[:,i])
    return hamm_distance

In [86]:
def kNN(Muwave_train, Muwave_test, M, L, k):
    
    # 1) PCA
    
    train_mpc, test_mpc = PCA(Muwave_train, Muwave_test, M
                             )
    # 2) Random projection keeping sign

    train_proj, test_proj = random_projection(L, M, train_mpc, test_mpc)
    
    # 3) Calculating Hamming Distances
    
    hamm_dist = hamm_distance(x_train, x_test, train_proj, test_proj)
    
    # 4) Sorting and getting indexes accordind cluster 

    k_index = np.zeros((y_test.size, k))
    predict = np.zeros(y_test.size)
    for i in range (y_test.size):
        k_index[i] = hamm_dist[i,:].argsort()[:k]
    
    k_index = k_index.astype(int)
    
    # 5) For that index getting the class and checking the maximum class
    
    for i in range (y_test.size):
        all_class = y_train[k_index[i]].reshape(k)
        count = np.bincount(all_class)
        max_class = np.argmax(count)
        #print (max_class)
        predict[i] = max_class

    # 6) Getting accuracy 
    
    acc = accuracy(predict, y_test)
    return acc

In [87]:
acc = kNN(Muwave_train, Muwave_test, 10, 10, 3)
acc

0.7142857142857143

In [88]:
a = np.empty((10, 10, 10)) 
_M = []
_L = []
_k = []
a = []
for M in range(3, 10):
    for L in range(3, 10):
        for k in range(3, 10):
            #a[M][L][k] = kNN(Muwave_train, Muwave_test, M, L, k)
            val =  kNN(Muwave_train, Muwave_test, M, L, k)
            a.append(val)
            _M.append(M)
            _L.append(M)
            _k.append(k)

For M, k, L in (3, 9) . Here is the table of accuracies. KNN using Hamming distance is nice. It sure is fast

In [89]:

d = pd.DataFrame()
d['M'] = _M
d['L'] = _L
d['k'] = _k
d['acc'] = a
print("Max acc = " , d['acc'].max())
print("Min acc = " ,d['acc'].min())
d

Max acc =  0.821428571429
Min acc =  0.214285714286


Unnamed: 0,M,L,k,acc
0,3,3,3,0.464286
1,3,3,4,0.500000
2,3,3,5,0.607143
3,3,3,6,0.500000
4,3,3,7,0.500000
5,3,3,8,0.500000
6,3,3,9,0.500000
7,3,3,3,0.392857
8,3,3,4,0.500000
9,3,3,5,0.464286
