In [None]:
import collections
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf

from tensorflow.keras import layers
import os


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa
import mne
import numpy as np
import mne

In [None]:
from pyeeglab import    TUHEEGArtifactDataset, SinglePickleCache, Pipeline, CommonChannelSet, \
                        LowestFrequency, ToDataframe, DynamicWindow, BinarizedSpearmanCorrelation, \
                        ToNumpy, JoinedPreprocessor

In [None]:
from pyeeglab import  BandPassFrequency

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

In [None]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
try:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
    print(e)

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Concatenate, Reshape, Flatten, LSTM
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from spektral.layers import GraphAttention
from spektral.utils import nx_to_adj, nx_to_node_features, add_eye
import numpy as np

import os
import sys
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Concatenate, Reshape, Flatten, Conv2D, MaxPool2D, LSTM
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold
import numpy as np
from pyeeglab import CorrelationToAdjacency, Bandpower, GraphWithFeatures, Kurtosis, AbsoluteArea, ZeroCrossing, StaticWindowOverlap

In [None]:
from keras import backend as K
def f1_metric(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
dataset = TUHEEGArtifactDataset('./data/tuh_eeg_artifact/v1.0.0/edf')

In [None]:
dataset.set_cache_manager(SinglePickleCache('./export'))

In [None]:
preprocessing = Pipeline([
    CommonChannelSet(),
    LowestFrequency(),
    BandPassFrequency(0.1, 47),
    ToDataframe(),
    DynamicWindow(16),
    BinarizedSpearmanCorrelation(),
    ToNumpy()
])

In [None]:
dataset = TUHEEGArtifactDataset('./data/tuh_eeg_artifact/v1.0.0/edf')
dataset.set_cache_manager(SinglePickleCache('./export'))

In [None]:
dataset = dataset.set_pipeline(preprocessing).set_minimum_event_duration(3).load()

In [None]:
data, labels = dataset['data'], dataset['labels']

In [None]:
labels[labels != 4] = 1
labels[labels == 4] = 0

In [None]:
adjs = np.array(data[0]).shape[0]
classes = len(set(labels))
input_shape = np.array(data[0]).shape[1:]


In [None]:
inputs = [[] for _ in range(adjs)]
for d in data:
    for i in range(adjs):
        inputs[i].append(d[i].reshape((*input_shape, 1)))
data = [np.array(i) for i in inputs]


In [None]:
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)

In [None]:
for train_idx, test_idx in skf.split(data[0], labels):
    x_train, y_train = [d[train_idx] for d in data], labels[train_idx]
    x_test, y_test = [d[test_idx] for d in data], labels[test_idx]

    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    cnns = []
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    reshape = Reshape((len(cnns), cnns[0].output_shape[1]))(combine)
    lstm = LSTM(32)(reshape)
    z = Dense(classes, activation='softmax')(lstm)

    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_metric])
    model.fit(x_train, y_train_cat, batch_size=32, epochs=50, shuffle=True, validation_split=0.1)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print("Fold accuracy: {:2.2f}".format(acc))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
preprocessing = Pipeline([
    CommonChannelSet(),
    LowestFrequency(),
    ToDataframe(),
    DynamicWindow(4),
    BinarizedSpearmanCorrelation(),
    ToNumpy()
])
dataset = TUHEEGArtifactDataset('./data/tuh_eeg_artifact/v1.0.0/edf')
dataset.set_cache_manager(SinglePickleCache('./export'))
dataset = dataset.set_pipeline(preprocessing).set_minimum_event_duration(6).load()

In [None]:
data, labels = dataset['data'], dataset['labels']
labels[labels != 4] = 1
labels[labels == 4] = 0
adjs = np.array(data[0]).shape[0]
classes = len(set(labels))
input_shape = np.array(data[0]).shape[1:]
inputs = [[] for _ in range(adjs)]
for d in data:
    for i in range(adjs):
        inputs[i].append(d[i].reshape((*input_shape, 1)))
data = [np.array(i) for i in inputs]
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)

In [None]:
for train_idx, test_idx in skf.split(data[0], labels):
    x_train, y_train = [d[train_idx] for d in data], labels[train_idx]
    x_test, y_test = [d[test_idx] for d in data], labels[test_idx]

    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    cnns = []
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    reshape = Reshape((len(cnns), cnns[0].output_shape[1]))(combine)
    lstm = LSTM(32)(reshape)
    z = Dense(classes, activation='softmax')(lstm)

    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_metric])
    model.fit(x_train, y_train_cat, batch_size=32, epochs=50, shuffle=True, validation_split=0.1)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print("Fold accuracy: {:2.2f}".format(acc))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
preprocessing = Pipeline([
    CommonChannelSet(['EEG T1-REF', 'EEG T2-REF']),
    LowestFrequency(),
    BandPassFrequency(0.1, 47),
    ToDataframe(),
    DynamicWindow(8),
    BinarizedSpearmanCorrelation(),
    ToNumpy()
])
dataset = TUHEEGArtifactDataset('./data/tuh_eeg_artifact/v1.0.0/edf')
dataset.set_cache_manager(SinglePickleCache('./export'))
dataset = dataset.set_pipeline(preprocessing).set_minimum_event_duration(3).load()

In [None]:
data, labels = dataset['data'], dataset['labels']
adjs = np.array(data[0]).shape[0]
classes = len(set(labels))
input_shape = np.array(data[0]).shape[1:]
inputs = [[] for _ in range(adjs)]
for d in data:
    for i in range(adjs):
        inputs[i].append(d[i].reshape((*input_shape, 1)))
data = [np.array(i) for i in inputs]
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)

In [None]:
import xgboost as xgb

In [None]:
total_acc = 0
total_f1 = 0
n_splits = 2
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
i = 0
for train_idx, test_idx in skf.split(data[0], labels):
    i+=1
    x_train, y_train = [d[train_idx] for d in data], labels[train_idx]
    x_test, y_test = [d[test_idx] for d in data], labels[test_idx]

    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    cnns = []
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    reshape = Reshape((len(cnns), cnns[0].output_shape[1]))(combine)
    lstm = LSTM(32)(reshape)
    z = Dense(classes, activation='softmax')(lstm)
    
    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_metric])
    model.fit(x_train, y_train_cat, batch_size=32, epochs=25, shuffle=True, validation_split=0.25)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Fold accuracy: {:2.2f}".format(acc))
    print("Fold f1: {:2.2f}".format(f1))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
preprocessing = Pipeline([
    CommonChannelSet(['EEG T1-REF', 'EEG T2-REF']),
    LowestFrequency(),
    BandPassFrequency(0.1, 47),
    ToDataframe(),
    DynamicWindow(8),
    BinarizedSpearmanCorrelation(),
    ToNumpy()
])
dataset = TUHEEGArtifactDataset('./data/tuh_eeg_artifact/v1.0.0/edf')
dataset.set_cache_manager(SinglePickleCache('./export'))
dataset = dataset.set_pipeline(preprocessing).set_minimum_event_duration(6).load()

In [None]:
data, labels = dataset['data'], dataset['labels']
adjs = np.array(data[0]).shape[0]
classes = len(set(labels))
input_shape = np.array(data[0]).shape[1:]
inputs = [[] for _ in range(adjs)]
for d in data:
    for i in range(adjs):
        inputs[i].append(d[i].reshape((*input_shape, 1)))
data = [np.array(i) for i in inputs]
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)

In [None]:
total_acc = 0
total_f1 = 0
n_splits = 2
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
i = 0
for train_idx, test_idx in skf.split(data[0], labels):
    i+=1
    x_train, y_train = [d[train_idx] for d in data], labels[train_idx]
    x_test, y_test = [d[test_idx] for d in data], labels[test_idx]

    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    cnns = []
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    reshape = Reshape((len(cnns), cnns[0].output_shape[1]))(combine)
    lstm = LSTM(32)(reshape)
    z = Dense(classes, activation='softmax')(lstm)
    
    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_metric])
    model.fit(x_train, y_train_cat, batch_size=32, epochs=25, shuffle=True, validation_split=0.1)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Fold accuracy: {:2.2f}".format(acc))
    print("Fold f1: {:2.2f}".format(f1))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
total_acc = 0
total_f1 = 0
n_splits = 2
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
i = 0
for train_idx, test_idx in skf.split(data[0], labels):
    i+=1
    x_train, y_train = [d[train_idx] for d in data], labels[train_idx]
    x_test, y_test = [d[test_idx] for d in data], labels[test_idx]

    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    cnns = []
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    z = Dense(classes, activation='softmax')(combine)
    
    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_metric])
    model.fit(x_train, y_train_cat, batch_size=32, epochs=25, shuffle=True, validation_split=0.1)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Fold accuracy: {:2.2f}".format(acc))
    print("Fold f1: {:2.2f}".format(f1))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
import matplotlib.pyplot as plt

In [None]:
training_loss = model.history.history['loss']
test_loss = model.history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show();


In [None]:
import pandas as pd

In [None]:
pd.DataFrame(matrix, columns=dataset['labels_encoder']).to_csv('results2.csv')

In [None]:
from tensorflow.keras.utils import plot_model

In [None]:
import pydot
import graphviz

In [None]:
dot_img_file = 'model_1.png'
plot_model(model, to_file=dot_img_file, show_layer_names=False, show_shapes=True,rankdir='LR')


In [None]:
model.save('my_model.h5')

In [None]:
data, labels = dataset['data'], dataset['labels']

In [None]:
data_reformated = data.reshape((data.shape[0], data.shape[1] * data.shape[2] * data.shape[2]))

In [None]:
data_reformated.shape

In [None]:
import xgboost as xgb

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_reformated, labels, test_size=0.2, random_state=123)


In [None]:
D_train = xgb.DMatrix(X_train, label=y_train)
D_test = xgb.DMatrix(X_test, label=y_test)


In [None]:
param = {
    'eta': 0.3, 
    'max_depth': 4,  
    'objective': 'multi:softprob',  
    'num_class': len(set(labels))} 

steps = 100  # The number of training iterations


In [None]:
model = xgb.train(param, D_train, steps)

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

preds = model.predict(D_test)
best_preds = np.asarray([np.argmax(line) for line in preds])

In [None]:
from catboost import CatBoostClassifier

In [None]:
model = CatBoostClassifier(iterations=50,
                           learning_rate=1,
                           loss_function='MultiClass',
                           verbose=True)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_test = model.predict(X_test)

In [None]:
print("f1 = {}".format(f1_score(y_test, best_preds, average='weighted')))

In [None]:
print("f1 = {}".format(accuracy_score(y_test, best_preds)))

In [None]:
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
i = 0
for train_idx, test_idx in skf.split(data_reformated, labels):
    i+=1
    X_train, X_test = data_reformated[train_idx], data_reformated[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]
    model = CatBoostClassifier(iterations=50,
                           learning_rate=1,
                           loss_function='MultiClass')
    model.fit(X_train, y_train, verbose=False)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Fold accuracy: {:2.2f}".format(acc))
    print("Fold f1: {:2.2f}".format(f1))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
i = 0
for train_idx, test_idx in skf.split(data_reformated, labels):
    i+=1
    X_train, X_test = data_reformated[train_idx], data_reformated[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]
    D_train = xgb.DMatrix(X_train, label=y_train)
    D_test = xgb.DMatrix(X_test, label=y_test)
    param = {
    'objective': 'multi:softprob',  
    'num_class': len(set(labels))} 

    steps = 100  # The number of training iterations
    model = xgb.train(param, D_train, steps)
    preds = model.predict(D_test)
    y_pred = np.asarray([np.argmax(line) for line in preds])
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Fold accuracy: {:2.2f}".format(acc))
    print("Fold f1: {:2.2f}".format(f1))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))

In [None]:
from sklearn.metrics import f1_score, make_scorer

f1 = make_scorer(f1_score , average='weighted')

In [None]:
from sklearn.model_selection import GridSearchCV

clf = xgb.XGBClassifier()
parameters = {
     "eta"    : [0.05, 0.10, 0.15, 0.20, 0.25, 0.30 ] ,
     "max_depth"        : [ 3, 4, 5, 6, 8, 10, 12, 15],
     "min_child_weight" : [ 1, 3, 5, 7 ],
     "gamma"            : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ],
     "colsample_bytree" : [ 0.3, 0.4, 0.5 , 0.7 ]
     }

grid = GridSearchCV(clf,
                    parameters, n_jobs=4,
                    scoring=f1,
                    cv=3)

grid.fit(X_train, y_train)


In [None]:
total_acc = 0
total_f1 = 0
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
i = 0
for train_idx, test_idx in skf.split(data[0], labels):
    i+=1
    x_train, y_train = [d[train_idx] for d in data], labels[train_idx]
    x_test, y_test = [d[test_idx] for d in data], labels[test_idx]

    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    cnns = []
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    z = Dense(classes, activation='softmax')(combine)

    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_metric])
    model.fit(x_train, y_train_cat, batch_size=32, epochs=25, shuffle=True, validation_split=0.5)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Fold accuracy: {:2.2f}".format(acc))
    print("Fold f1: {:2.2f}".format(f1))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc/n_splits
    total_f1 += f1/n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
print("Total model f1: {:2.2f}".format(total_f1))