# Individual Identification Based on MEG Resting State Connectivity Matrices
## License  
This file is part of the project megFingerprinting. All of megFingerprinting code is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. megFingerprinting is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with megFingerprinting. If not, see <https://www.gnu.org/licenses/>.

In [205]:
# Get them libraries going
import keras
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
import matplotlib.pyplot as plt
import numpy as np
from os import listdir
from os.path import isfile, join
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

### Get feature matrix from csv files

In [206]:
# Wrangle the dataset into one big matrix. For now I will use the first 50 subjects

def prune_subject_csv(filename):
    '''
    This function takes in the subject's csv file from MATLAB, takes out auto-correlations, 
    doubled correlations (because of symmetry) and outputs a numpy array ready to be concatenated
    in the grand feature matrix
    Args:
        filename (string): Name of the csv matrix
    Returns: 
        sub_feat (np.array): Subject's features 
    '''
    freq_band = ['delta', 'theta', 'alpha', 'beta', 'gamma', 'hgamma']
    sub_feat = np.zeros([1, int(((67 * 68)/2)*6+1)]) # Number of unique values in corr matrix + subject label
    x = 0
    for iFreq in freq_bands:
        aec_matrix = pd.read_csv(filename, names = ['source', 'target', 'corr', 'freq_band'])
        aec_matrix = aec_matrix.loc[aec_matrix['freq_band'] == iFreq]
        aec_matrix = aec_matrix.replace('\s', '', regex = True).pivot_table(index = 'source', columns = 'target', values = 'corr') 
        df_out = aec_matrix.stack()
        df_out = df_out[df_out.index.get_level_values(0) != df_out.index.get_level_values(1)]
        df_out = df_out[df_out.index.get_level_values(0) < df_out.index.get_level_values(1)]
        df_out.index = df_out.index.map('_'.join)
        sub_feat[0, x*2278 : ((x+1)*2278)] = df_out.to_frame().T.values
        x += 1
    sub_feat[0, -1] = int(filename[25:28])    
    return sub_feat

# Get 20 subjects: both training and testing datasets
onlyfiles = [f for f in listdir('output/csv_matrices/') if isfile(join('output/csv_matrices/', f))]
n_subs = 50 # Change here to get number of participants! \
sub_train = np.zeros([n_subs, int(((67 * 68)/2)*6+1)])
sub_valid = np.zeros([n_subs, int(((67 * 68)/2)*6+1)])
iv = 0
it = 0
for iFile in sorted(onlyfiles)[0:(n_subs*2)]: 
    sub = 'output/csv_matrices/' + iFile
    if sub[39] == 'v':
        sub_valid[iv, :] = prune_subject_csv(sub)
        iv += 1
    else:
        sub_train[it, :] = prune_subject_csv(sub)
        it += 1

### Separate matrix into dependent and independent variables and preprocess it

In [207]:
# Wrangle arrays; no feature scaling because correlations
X_train = sub_train[:, :-1]
X_test = sub_valid[:, :-1]
y_train = np.expand_dims(sub_train[:, -1], axis = 1)
y_test = np.expand_dims(sub_valid[:, -1], axis = 1)

# Encoding the dependent variable (validation and training)
labelencoder_y_train = LabelEncoder()
y_train[:, 0] = labelencoder_y_train.fit_transform(y_train[:, 0])
onehotencoder = OneHotEncoder()
y_train = onehotencoder.fit_transform(y_train).toarray()

labelencoder_y_valid = LabelEncoder()
y_test[:, 0] = labelencoder_y_valid.fit_transform(y_test[:, 0])
onehotencoder = OneHotEncoder()
y_test = onehotencoder.fit_transform(y_test).toarray()

# Now, to avoid the dummy variable trap, we take away one of the three columns created by the hot encoder
y_train = y_train[:, 1:]
y_test = y_test[:, 1:]

### Building the Artificial Neural Network

In [208]:
# Initialising the ANN
classifier = Sequential()
n_neurons = (X_train.shape[0] + X_train.shape[1]) / 2 # As a rule of thumb: hidden layer can have (input + ouput)/2 nodes

# Adding the input layer hidden layers
classifier.add(Dense(int(n_neurons), kernel_initializer = 'uniform', activation = 'relu', input_dim = 13668))
classifier.add(Dropout(rate = 0.5))
classifier.add(Dense(int(n_neurons/2), kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(rate = 0.5))
classifier.add(Dense(int(n_neurons/8), kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(rate = 0.5))
classifier.add(Dense(int(n_neurons/16), kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(rate = 0.5))

# Adding the output layer
classifier.add(Dense(int(n_subs - 1), kernel_initializer = 'uniform', activation = 'softmax')) 

# Compiling the ANN
classifier.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['accuracy']) 

# Fit the ANN to the training set
classifier.fit(X_train, y_train, batch_size = int(n_subs/10), epochs = 50, verbose = 2) # Verbose 2 avoids the sliding bar, verbose = 0 no output at all

Epoch 1/50
 - 10s - loss: 14.5057 - acc: 0.0000e+00
Epoch 2/50
 - 7s - loss: 15.6024 - acc: 0.0000e+00
Epoch 3/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 4/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 5/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 6/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 7/50
 - 7s - loss: 15.2736 - acc: 0.0200
Epoch 8/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 9/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 10/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 11/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 12/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 13/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 14/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 15/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 16/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 17/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 18/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 19/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 20/50
 - 7s - loss: 15.4734 - acc: 0.0200
Epoch 21/50
 - 7s - loss: 15.4734 - acc:

<keras.callbacks.History at 0x7fcc4c7919b0>

### Evaluating the model

In [209]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
accuracy_score(y_test.argmax(axis = 1), y_pred.argmax(axis = 1))

0.02

As of 12/08/18, the model keeps on overfitting (accuracy is at chance level)