# Abdelrahman Yehia
# Mahmoud Hossam

In [None]:
# Imports
import numpy as np  # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os
from scipy import signal 
from scipy.io import loadmat
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from tensorflow.keras.models  import Sequential
from tensorflow.keras.layers import Conv2D, Activation, Dropout, Dense, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD, Adam, Adadelta
import tensorflow as tf

In [None]:
# Extracting the MAT files
!unzip "/kaggle/input/decoding-the-human-brain/train_01_06.zip"  -d ./train
!unzip "/kaggle/input/decoding-the-human-brain/train_07_12.zip"  -d ./train
!unzip "/kaggle/input/decoding-the-human-brain/train_13_16.zip"  -d ./train
!unzip "/kaggle/input/decoding-the-human-brain/test_17_23.zip"  -d ./test

# A walkaround the data
The training data contains 9414 trials from 16 participants.

In [None]:
# Run only if features.csv is not found or when implementing new features
# Read MAT files
train_mats = []
for dirname, _, filenames in os.walk('/kaggle/working/train/data'):
    for filename in filenames:
        print("Loading: ", os.path.join(dirname, filename))
        train_mats.append(loadmat(os.path.join(dirname, filename)))

# Merging data
train_data = train_mats[0]['X']
train_labl = train_mats[0]['y']
for i in range(1, len(train_mats)):
    train_data = np.concatenate((train_data, train_mats[i]['X']), axis=0)
    train_labl = np.concatenate((train_labl, train_mats[i]['y']), axis=0)

del train_mats # Free RAM pls 
print('Done')
print(train_data.shape, type(train_data))
print(train_labl.shape, type(train_labl))

In [None]:
# Plotting the signal from the first channel from the first trail
plt.figure(figsize=(16,4))
plt.plot(train_data[0][0])
plt.xlabel('Time')
plt.ylabel('Value')


In [None]:
# The power spectral density (PSD) of the signal
f,psd = signal.welch(train_data[0][0], fs=250.0)
plt.figure(figsize=(16,4))
plt.semilogy(f, psd)
plt.xlabel('frequency [Hz]')
plt.ylabel('PSD [V**2/Hz]')
plt.show()

# Preprocessing
The data description mentions that all preprocessing tasks were applied using mne-tools. However, we will just make some simple preprocessing tasks from a machine learning prospective.

In [None]:
# Is there any Null Values?
print(np.isnan(np.sum(train_data)))
print(np.isnan(np.sum(train_labl)))

In [None]:
# Wrong Labels?
labels = []
for label in train_labl:
    if label not in labels:
        labels.append(label)
print(labels)

# Filtering

In [None]:
samp_freq = 1000 # Sample Frequency(Hz)
notch_freq = 50 # Frequency to be removed from the signal (Hz)
quality_factor = 20.0

# Design a notch filter using signal.iirnotch
b_notch, a_notch = signal.iirnotch(notch_freq, quality_factor, samp_freq)

# Compute magnitude response of the designed filter
freq, h = signal.freqz(b_notch, a_notch, fs=samp_freq)
  
fig = plt.figure(figsize=(8, 6))
  
# Plot magnitude response of the filter
plt.plot(freq*samp_freq/(2*np.pi), 20 * np.log10(abs(h)),
         'r', label='Bandpass filter', linewidth='2')
plt.xlabel('Frequency [Hz]', fontsize=20)
plt.ylabel('Magnitude [dB]', fontsize=20)
plt.title('Notch Filter', fontsize=20)
plt.grid()

In [None]:
from scipy.signal import butter, lfilter,iirnotch, medfilt
def apply_filters(data):
    # Low-pass filter 100Hz
    low = 0.8  # low = 100Hz / (250Hz/2)
    b_low, a_low = butter(5, low, btype='low') # 5th Order filter
    
    # Notch filter 50Hz (Powerline freq)
    b_not, a_not = iirnotch(50.0, 30.0, 250.0)
    
    filtered = lfilter(b_low, a_low, data)     # Applying Low-pass filter
    filtered = lfilter(b_not, a_not, filtered) # Applying Notch filter
    #iltered = medfilt(filtered[t][c])         # Applying Median filter
    
    return filtered

In [None]:
# Plotting
n = np.linspace(0, 1, 375)
fig = plt.figure(figsize=(8, 6))
plt.subplot(211)
plt.plot(n,train_data[0][0], color='r', linewidth=2)
plt.xlabel('Time', fontsize=20)
plt.ylabel('Magnitude', fontsize=18)
plt.title('Original Signal', fontsize=20)

# Apply notch filter to the noisy signal using signal.filtfilt
outputSignal = apply_filters(train_data[0][0])

# Plot notch-filtered version of signal
plt.subplot(212)
  
# Plot output signal of notch filter
plt.plot(n, outputSignal)
plt.xlabel('Time', fontsize=20)
plt.ylabel('Magnitude', fontsize=18)
plt.title('Filtered Signal', fontsize=20)
plt.subplots_adjust(hspace=0.5)
fig.tight_layout()
plt.show()

# Feature Extraction

In [None]:
def create_features(XX, tmin, tmax, sfreq, tmin_original=-0.5):
    """Creation of the feature space:
    - restricting the time window of MEG data to [tmin, tmax]sec.
    - Concatenating the 306 timeseries of each trial in one long
      vector.
    - Normalizing each feature independently (z-scoring).
    """
    nsamples, nx, ny = XX.shape

    #Applying 50Hz notch filter on all channels
    x1_filt50hz = np.empty((nsamples,nx-162, ny))
    print(x1_filt50hz.shape)
    for i in range(nsamples):
        for j in range(nx-162):
            x1_filt50hz[i][j] = apply_filters(XX[i][162+j])#signal.filtfilt(b_notch, a_notch, XX[i][j])
    
    print("Applying the desired time window.")
    
    beginning = np.round((tmin - tmin_original) * sfreq).astype(np.int)
    end = np.round((tmax - tmin_original) * sfreq).astype(np.int)
    XX = x1_filt50hz[:,:, beginning:end].copy()

    print("2D Reshaping: concatenating all 306 timeseries.")
    XX = XX.reshape(XX.shape[0], XX.shape[1] * XX.shape[2])

    print("Features Normalization.")
    XX -= XX.mean(0)
    XX = np.nan_to_num(XX / XX.std(0))

    return XX

In [None]:
print("DecMeg2014: https://www.kaggle.com/c/decoding-the-human-brain")
subjects_train = range(1, 17) # use range(1, 17) for all subjects
print("Training on subjects", subjects_train)

# We throw away all the MEG data outside the first 0.5sec from when
# the visual stimulus start:
tmin = 0.0
tmax = 0.500
print("Restricting MEG data to the interval [%s, %s]sec." % (tmin, tmax))

X_train = []
y_train = []
X_test = []
ids_test = []

subject_samples = []
print("Creating the trainset.")
for subject in subjects_train:
    filename = '/kaggle/working/train/data/train_subject%02d.mat' % subject
    print("Loading", filename)
    data = loadmat(filename, squeeze_me=True)
    XX = data['X']
    yy = data['y']
    sfreq = data['sfreq']
    tmin_original = data['tmin']
    print("Dataset summary:")
    print("XX:", XX.shape)
    print("yy:", yy.shape)
    print("sfreq:", sfreq)

    XX = create_features(XX, tmin, tmax, sfreq)

    X_train.append(XX)
    y_train.append(yy)

X_train = np.vstack(X_train)
y_train = np.concatenate(y_train)
print("Trainset:", X_train.shape)

print("Creating the testset.")
subjects_test = range(17, 24)
for subject in subjects_test:
    filename = '/kaggle/working/test/data/test_subject%02d.mat' % subject
    print("Loading", filename)
    data = loadmat(filename, squeeze_me=True)
    XX = data['X']
    ids = data['Id']
    sfreq = data['sfreq']
    tmin_original = data['tmin']
    print("Dataset summary:")
    print("XX:", XX.shape)
    print("ids:", ids.shape)
    print("sfreq:", sfreq)

    XX = create_features(XX, tmin, tmax, sfreq)

    X_test.append(XX)
    ids_test.append(ids)

X_test = np.vstack(X_test)
ids_test = np.concatenate(ids_test)
print("Testset:", X_test.shape)

In [None]:
print(X_train.shape)

In [None]:
print(X_train.shape)

# Model Training

In [None]:
clf = SVC()
print("Training.")
clf.fit(X_train, y_train)

In [None]:
print("Predicting.")
y_pred = clf.predict(X_test)

In [None]:
print(classification_report(y_train,np.round(clf.predict(X_train))))

filename_submission = "Submission.csv"
print(filename_submission)
print("Creating submission file", filename_submission) 
f = open(filename_submission, "w")
f.write("Id,Prediction\n")
for i in range(len(y_pred)):
    f.write(str(ids_test[i]) + "," + str(np.round(y_pred[i])) + "\n")

f.close()
# print("Done.")