# Set-up

## Installing libraries and libcudnn8

In [None]:
import os

FILEID = "1h4FWB5fw7sBDCSM-EENK1UadqKSCqg24"

contents = os.listdir(os.getcwd())

if 'MI_EEG_ClassMeth' not in contents:
    !wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O MI_EEG_ClassMeth.zip && rm -rf /tmp/cookies.txt
    !unzip MI_EEG_ClassMeth.zip
else:
    print("MI_EEG_ClassMeth already downloaded!")

!apt-get install --allow-change-held-packages libcudnn8=8.1.1.33-1+cuda11.2 -y
!pip install -U git+https://github.com/UN-GCPDS/python-gcpds.databases
!pip install mne
!pip install pickle5
!pip install gcpds.utils
!pip install scikeras[tensorflow]

## Import libraries

In [None]:
# freq filter 
from MI_EEG_ClassMeth.FeatExtraction import TimeFrequencyRpr

#EEG montage
from gcpds.utils.mne_handler import get_best_montage

# general
import numpy as np
from scipy.signal import resample
import pickle5 as pickle
import warnings
import mne
from time import time
warnings.filterwarnings('ignore')

# tensorlfow 
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, AveragePooling2D, BatchNormalization, Input, Flatten
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import L1L2

# scikeras
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV,StratifiedShuffleSplit
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score, cohen_kappa_score, roc_auc_score

## Define functions

In [None]:
def kappa(y_true, y_pred):
    return cohen_kappa_score(np.argmax(y_true, axis = 1),np.argmax(y_pred, axis = 1))

## PAIN dataset

In [None]:
def load_PAIN(db,sbj,f_bank,vwt,new_fs):

    channels_names = np.array(['Fp1','Fp2',
                      'F3','F4','C3','C4','P3','P4','O1','O2','F7','F8',
                      'T7','T8','P7','P8','Fz','Cz','Pz','Oz',
                      'FC1','FC2','CP1','CP2','FC5','FC6','CP5','CP6',
                      'TP9','TP10','LE','RE','P1','P2','C1','C2',
                      'FT9','FT10','AF3','AF4','FC3','FC4','CP3','CP4','PO3','PO4',
                      'F5','F6','C5','C6','P5','P6','PO9','Iz','FT7','FT8',
                      'TP7','TP8','PO7','PO8','Fpz','PO10','CPz','POz',
                      'Ne','Ma','Ext','ECG'])
    
    with open('{}BMOP_Motor_S{}.pkl'.format(db,'0' + str(sbj) if sbj < 10 else sbj), 'rb') as f:
        data = pickle.load(f)
        
    X = data['X']  # trials, channels, time
    y = data['y']
    sex = data['sex'].ravel()
    age = data['age'].ravel()
    fs = float(data['fs'])
    
    tf_repr = TimeFrequencyRpr(sfreq = fs, f_bank = f_bank, vwt = vwt)
    
    #Read electrode positions to load the best standard montage-MNE
    best_montages = get_best_montage(channels_names)
    montage = best_montages.iloc[0]['montage']
    no_channels = np.array(best_montages.iloc[0]['missings channels'])
    channels_to_remove = np.array([np.argwhere(channels_names==no)[0] for no in no_channels])[:,0]

    #Delete the missing channels the original array respecting the positions
    channels_names = np.delete(channels_names, channels_to_remove)
    X = np.delete(X, channels_to_remove, axis=1)

    #Number channels does not match with the dimension of X, 
    #thus the last channel is discarded because it has weird amplitudes
    X = X[:,:-1,:]

    info = mne.create_info(list(channels_names), sfreq=fs, ch_types="eeg")
    info.set_montage(montage)
    info

    event_id = {
        'pain/high':2,
        'resting':3,
        }

    events = [[i, 1, cls[0]] for i, cls in enumerate(y)]
    tmin = 0

    epochs = mne.EpochsArray(X, info, events=events, tmin=tmin, event_id=event_id)
    X = epochs.get_data()
    y = y-2
    X = np.squeeze(tf_repr.transform(X))
                             
    #Resampling
    if new_fs != fs:
        X = resample(X, int((X.shape[-1]/fs)*new_fs), axis = -1)
    return X,y,age,sex,fs

## Define the model (EEGNet)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import Conv2D, AveragePooling2D
from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import SpatialDropout2D
from tensorflow.keras.layers import Input, Flatten
from tensorflow.keras.constraints import max_norm

def EEGNet(nb_classes, Chans = 64, Samples = 128,
             dropoutRate = 0.5, kernLength = 64, F1 = 8,
             D = 2, F2 = 16, norm_rate = 0.25, dropoutType = 'Dropout'):

    if dropoutType == 'SpatialDropout2D':
        dropoutType = SpatialDropout2D
    elif dropoutType == 'Dropout':
        dropoutType = Dropout
    else:
        raise ValueError('dropoutType must be one of SpatialDropout2D '
                         'or Dropout, passed as a string.')

    input1   = Input(shape = (Chans, Samples, 1))

    block1       = Conv2D(F1, (1, kernLength), padding = 'same',
                                   name='Conv2D_1',
                                   input_shape = (Chans, Samples, 1),
                                   use_bias = False)(input1)
    block1       = BatchNormalization()(block1)
    block1       = DepthwiseConv2D((Chans, 1), use_bias = False,
                                   name='Depth_wise_Conv2D_1',
                                   depth_multiplier = D,
                                   depthwise_constraint = max_norm(1.))(block1)
    block1       = BatchNormalization()(block1)
    block1       = Activation('elu')(block1)
    block1       = AveragePooling2D((1, 4))(block1)
    block1       = dropoutType(dropoutRate)(block1)

    block2       = SeparableConv2D(F2, (1, 16),
                                   name='Separable_Conv2D_1',
                                   use_bias = False, padding = 'same')(block1)
    block2       = BatchNormalization()(block2)
    block2       = Activation('elu')(block2)
    block2       = AveragePooling2D((1, 8))(block2)
    block2       = dropoutType(dropoutRate)(block2)

    flatten      = Flatten(name = 'flatten')(block2)

    dense        = Dense(nb_classes, name = 'output',
                         kernel_constraint = max_norm(norm_rate))(flatten)
    softmax      = Activation('softmax', name = 'out_activation')(dense)

    return Model(inputs=input1, outputs=softmax)

# Experiment

## Experiment configuration 

In [None]:
import os 
seed=23
folds=5
epochs_train = 500

save_folder = 'GroupsEEGNet_60Hz'

n_subjects = 51

In [None]:
import os
PATH = f'{os. getcwd()}/{save_folder}'

## Run experiment

In [None]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from scipy.stats import ks_2samp
from scipy.spatial.distance import squareform

tf.random.set_seed(seed)

accs_dict = {}

subjects = np.arange(n_subjects)+1
subjects = np.delete(subjects, 17)

db = '../input/brain-mediators-of-pain-motor/'

num_class = 2

load_args = dict(db = db,
            f_bank = np.asarray([[4., 60.]]),
            vwt = np.asarray([[0.5,2.5]]),
            new_fs = 256.0)

load_args['sbj'] = 1 

X_train, y_train, age, sex, fs = load_PAIN(**load_args)

### Groups definition

In [None]:
groups = {
    "G1": {
        "Name": "Group1",
        "Subjects": [2, 3, 7, 11, 24, 35, 36, 37, 38, 39, 40, 41, 44, 50],
        "Color": "Dark Blue"
    },
    "G2": {
        "Name": "Group2",
        "Subjects": [1, 4, 5, 6, 8, 10, 12, 13, 14, 17, 19, 26, 32, 33, 34, 42, 45, 46, 47, 49, 51],
        "Color": "Light Blue"
    },
    "G3": {
        "Name": "Group3",
        "Subjects": [9, 15, 16, 20, 21, 23, 25, 27, 28, 29, 30],
        "Color": "Yellow"
    },
    "G4": {
        "Name": "Group4",
        "Subjects": [22, 31, 43, 48],
        "Color": "Brown"
    }
}

In [None]:
from sklearn.model_selection import LeaveOneGroupOut

print("Starting experiment...\n")

t=time()
groups_keys = list(groups.keys())

for group in groups_keys:
    group_name = groups[group]["Name"]
    
    print("------------------------------------------------------------------------------------------\n")
    print(f"                               {group_name} starting...")
    print("------------------------------------------------------------------------------------------\n")
    
    group_subs = groups[group]["Subjects"]
    print(f"Loading {group_name} subjects\n")
    
    groups[group]["Groups"] = []
    
    g = 0
    
    for sbj in group_subs:
        print(f"Loading subject: {sbj}\n")
        load_args['sbj'] = sbj 

        if (sbj == group_subs[0]):
            X_train, Y_train, _, sex, _ = load_PAIN(**load_args)
            
#             g+=1
#             groups[group]["Groups"] += [g] * len(X_train)
            
        else:
            X_train_, Y_train_, _, sex, _ = load_PAIN(**load_args)
            
            X_train = np.concatenate((X_train, X_train_), axis = 0)
            Y_train = np.concatenate((Y_train, Y_train_), axis = 0)
            
#             g+=1
#             groups[group]["Groups"] += [g] * len(X_train_)
        print("\n")
    
    Y_train = tf.keras.utils.to_categorical(Y_train,num_classes=num_class)

    # ----build model
    clf = KerasClassifier(
            EEGNet,
            random_state=seed,

            # ----model hyperparameters
            nb_classes=num_class, 
            Chans = X_train.shape[1], 
            Samples = X_train.shape[2],
            dropoutRate = 0.5,
            kernLength = X_train.shape[2],
            F1 = 4, D = 4, F2 = 32,

            # ----model config
            verbose=0,
            batch_size=500, #full batch        
            loss=tf.keras.losses.CategoricalCrossentropy(),
            optimizer="adam",
            optimizer_learning__rate=0.1,
            metrics = ['accuracy'],
            epochs = epochs_train
        )
    # ----search params
    param_grid =  {
                    'F1':[4,8],
                    'kernLength':[64, 128],
                    }
    
#     logo = LeaveOneGroupOut()

    # ----Gridsearch
    scoring = {"AUC": 'roc_auc', "Accuracy": make_scorer(accuracy_score),'Kappa':make_scorer(kappa)}

#     cv = GridSearchCV(clf,param_grid,cv=logo,
#                          verbose=0,n_jobs=1,
#                          scoring=scoring,
#                          refit="Accuracy")
    # ----find best params with gridsearch
#     cv.fit(X = X_train, y = Y_train, groups = groups[group]["Groups"])

    cv = GridSearchCV(clf,param_grid,cv=StratifiedShuffleSplit(n_splits = folds, test_size = 0.2, random_state = seed),
                             verbose=0,n_jobs=1, scoring=scoring, refit="Accuracy")
    cv.fit(X_train,Y_train)

    # ----best score
    print('Group', group_name,'Accuracy',cv.best_score_,'elapsed time',time()-t)
    print('---------')

    cv.cv_results_['best_index_'] = cv.best_index_

    full_path = os.path.join(PATH)

    try:
        os.makedirs(full_path)
    except:
        pass

    cv.best_estimator_.model_.save_weights(full_path + f'/{group_name}_weights.h5')
    with open(full_path + f'/{group_name}.p','wb') as f:
        pickle.dump(cv.cv_results_,f)     

In [None]:
import shutil
shutil.make_archive("GroupsEEGNet_60Hz", 'zip', "/kaggle/working/GroupsEEGNet_60Hz")