# MultiSPI DL Classification

This notebooks implements a simple deep learning classifier that receives all the SPIs of a subject and predict their label (AVGP or NVGP).

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['KERAS_BACKEND'] = 'torch'

import keras
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
# load data

data = pd.read_csv('data/Julia2018/spis_dosenbach2010_network_fast.csv')
na_mask = data.groupby('spi').apply(lambda x: x.isna().sum().sum(), include_groups=False) > 0
na_spis = na_mask[na_mask].index
data = data.query('spi=="phase_multitaper_max_fs-1_fmin-0_fmax-0-5"')  # TOP featureset using svm
data = data.query('spi not in @na_spis')
data = data.pivot(index=['subject', 'label'], columns=['spi'])
data.columns = ['_'.join(col) for col in data.columns.values]

In [None]:

X = data.values
labels = data.index.get_level_values('label').values

# y_encoder = OneHotEncoder()
# y = y_encoder.fit_transform(labels.reshape(-1,1)).toarray()

y_encoder = LabelEncoder()
y = y_encoder.fit_transform(labels).reshape(-1,1)

In [None]:
class ProgressBar(keras.callbacks.Callback):
    def __init__(self, n_epochs=None):
        from tqdm.auto import tqdm
        self.n_epochs = n_epochs
        self.pbar = tqdm(total=n_epochs, unit='epoch', dynamic_ncols=True, leave=False)

    def on_train_begin(self, logs=None):
        self.pbar.reset()

    def on_epoch_end(self, epoch, logs=None):
        self.pbar.set_postfix(logs)
        self.pbar.update(epoch - self.pbar.n + 1)

    def on_train_end(self, logs=None):
        self.pbar.close()


# build model
def build_model(): 

    model = keras.Sequential(
        [
            keras.layers.Input(shape=X.shape[1:]),
            keras.layers.Dense(X.shape[1]*2, activation='relu'),
            keras.layers.Dense(16, activation='relu'),
            keras.layers.Dense(2, activation='softmax'),
        ]
    )

    model.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(),
        optimizer=keras.optimizers.Adam(learning_rate=.01),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
        ],
    )

    return model

# evaluate model
scores = []
for run in range(1, 11):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y)

    model = build_model()

    model.fit(
        X_train, y_train,
        epochs=10000,
        verbose=0,
        shuffle=True,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='loss', patience=1000),
            keras.callbacks.CSVLogger('tmp/keras_logs/spi_v1.csv'),
            ProgressBar(n_epochs=10000)
        ]
    )
    score = model.evaluate(X_test, y_test, return_dict=True, verbose=0)
    scores.append(score['accuracy'])
    # y_pred = model.predict(X_test)
    # y_pred = y_encoder.inverse_transform(y_pred)
    # y_test = y_encoder.inverse_transform(y_test)

    print(f'test accuracy (run_{run:02}):', score['accuracy'])

print('mean accuracy:', np.mean(scores))