# Brain Games 3-classification using Neural Network

In [149]:
%matplotlib inline
import numpy as np
import scipy.io
import sklearn
import scipy.cluster
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import signal
import time
from sklearn.neural_network import MLPClassifier
from sklearn.utils import shuffle

In [150]:
csvData = []
csvData.append(np.genfromtxt('DoingnothingbutnotblankingoutJustthinkspontaneouslyNoparticularTriggerEvent.csv', delimiter=','))
csvData.append(np.genfromtxt('2min1-100additionsubtraction.csv', delimiter=','))
csvData.append(np.genfromtxt('2min1-50multiplication.csv', delimiter=','))
print(csvData[0].shape)
sample_time = 120
sample_len = csvData[0].shape[0]
window_len = 0.2 #how long is each window in terms of seconds
window_num = (int)(sample_time//window_len) 
data_per_window = (int)(sample_len//(window_num)) #data_per_window = datapoints per window = sample_len // (window_num)
#assert(data_per_window % 2 == 0)

print("There are",data_per_window, "datapoints per window, and there are", window_num, "windows.")

(30577, 6)
There are 51 datapoints per window, and there are 599 windows.


In [151]:
channel = []
for d in csvData:
    channel.append(d[1:,1:])
    #print(d.shape)
    #plt.plot(rawData[:,:])
    #plt.xlim((0,rawData.shape[0]))
    #plt.title('A Very Representative Feature VS Full Time')
    #plt.figure(figsize = (16,9))
print(np.asarray(channel).shape)

(3, 30576, 5)


In [152]:
residual = sample_len % data_per_window
print("There are", residual, "points left out by segmentation, now cutting them from both ends...")
residualStart = residual // 2
residualEnd = residual - residualStart

pruned = []
for c in channel:
    pruned.append(c[residualStart : -residualEnd+1,:])
pruned = np.asarray(pruned)
print(pruned.shape)
for p in pruned:
    assert np.asarray(p).shape[0] % data_per_window == 0

There are 28 points left out by segmentation, now cutting them from both ends...
(3, 30549, 5)


In [153]:
def stft(nfft, data):
    #nfft should be the closest power of 2 to data_per_window
    f,t,Zxx = scipy.signal.stft(data, nperseg=data_per_window, return_onesided=False, padded=True, nfft=nfft)
    #assert (Zxx.shape[1] == 2*window_num*len(csvData) + 1)
    #print("There are", Zxx.shape[1],"time window captured by STFT...")
    Zxx = Zxx.T #Zxx = new data matrix --> time windows are our new data points and frequency are the features
    #We sacrificed segements of data to expand feature size from 1 to 256
    #print(Zxx.shape)
    #print("Parsing each DFT to extract the actual coefficients used in the trucated Fourier Series...")
    for i in range(0,Zxx.shape[0]):
        chnl = Zxx[i]
        chnl[0] = chnl[0].real
        chnl[1:nfft//2] = 1*chnl[1:nfft//2].real
        chnl[nfft//2] = chnl[nfft//2]
        chnl[nfft//2 + 1:] = (1)*chnl[nfft//2 + 1:].imag
    assert ((Zxx.real == Zxx).all())
    #print("Coefficient extracted!")
    Zxx = Zxx.real #The imaginary compoenent is 0 by now, we are just getting rid of the j for PCA
    #Since PCA from sklearn doesn't handle complex data, let us simply use the weight of thecosine part of the frequency space
    return Zxx




In [154]:
nfft = 64 #depending on the window size
#We again reduce the data size and increase the features from 5 to 5*nfft
post_stft_dataset = []
for p in pruned:
    expanded_features_dataset = []
    print("Parsing each DFT to extract the actual coefficients used in the trucated Fourier Series...")
    for i in range(p.shape[1]):
        channel = p[:,i]
        stft_results = stft(nfft, channel)
        expanded_features_dataset.append(stft_results)
    expanded_features_dataset = np.asarray(expanded_features_dataset)
    hstacked = expanded_features_dataset[0]
    for expanded_channel in expanded_features_dataset[1:]:
        hstacked = np.hstack((hstacked, expanded_channel))
    post_stft_dataset.append(hstacked)
    print("Coefficient extracted!")
post_stft_dataset = np.asarray(post_stft_dataset)
print(post_stft_dataset.shape)

Parsing each DFT to extract the actual coefficients used in the trucated Fourier Series...
Coefficient extracted!
Parsing each DFT to extract the actual coefficients used in the trucated Fourier Series...
Coefficient extracted!
Parsing each DFT to extract the actual coefficients used in the trucated Fourier Series...
Coefficient extracted!
(3, 1176, 320)


In [155]:
def train_test_split(dataset, label, ratio):
    split = (int)(dataset.shape[0] * ratio)
    y = label * np.ones(dataset.shape[0])
    return dataset[:split], dataset[split:], y[:split], y[split:]
def conglomerates(three_d_matrix):
    vstacked = three_d_matrix[0]
    for m in three_d_matrix[1:]:
        vstacked = np.vstack((vstacked, m))
    return vstacked

In [156]:
X_train, X_test, y_train, y_test = [],[],[],[]
for i in range(post_stft_dataset.shape[0]):
    xtr, xt, ytr, yt = train_test_split(post_stft_dataset[i],i,0.8)
    X_train.append(xtr)
    X_test.append(xt)
    y_train.append(ytr)
    y_test.append(yt)
X_train = conglomerates(np.asarray(X_train))
X_test = conglomerates(np.asarray(X_test))
y_train = np.asarray(y_train).flatten()
y_test = np.asarray(y_test).flatten()

assert X_train.shape[0] == y_train.shape[0]
assert X_test.shape[0] == y_test.shape[0]

In [157]:
mlp = MLPClassifier(hidden_layer_sizes=(200,20), max_iter=50, alpha=1e-5,
                    solver='adam', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.1)

In [158]:
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))


Iteration 1, loss = inf
Iteration 2, loss = 1.10595624
Iteration 3, loss = 1.10398726
Iteration 4, loss = 1.10061907
Iteration 5, loss = 1.10261097
Iteration 6, loss = 1.10428943
Iteration 7, loss = 1.10435145
Iteration 8, loss = 1.09970913
Iteration 9, loss = 1.09998477
Iteration 10, loss = 1.10071772
Iteration 11, loss = 1.10251488
Iteration 12, loss = 1.10019768
Iteration 13, loss = 1.10207406
Iteration 14, loss = 1.10099891
Iteration 15, loss = 1.10466568
Iteration 16, loss = 1.09978070
Iteration 17, loss = 1.10142957
Iteration 18, loss = 1.10180977
Iteration 19, loss = 1.10058691
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Training set score: 0.333333
Test set score: 0.333333
