## Collect Training Data

In [None]:
from tkinter import *
import pandas as pd

root = Tk()
var = IntVar()
button = Checkbutton(root, text='not confusing -> unpressed / confusing -> pressed', onvalue=1, offvalue=0, variable=var)
check_flag, save_idx = False, 56
label = eval(input('Give initial label: 0: not confusing / 1: confusing'))

def check_label():
    save_file = True
    global var, label, check_flag, save_idx
    button_value = var.get()
    
    training_data = pd.read_csv('training_data.csv')[['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2']]
    if int(training_data['Delta'][0]) == int(check_flag):
        save_file = False
    elif button_value != label:
        save_file = False
        label = button_value
        check_flag = not check_flag
        
    if save_file:
        check_flag = not check_flag
        training_data = training_data.drop([0])
        labels = [label] * (len(training_data['Delta']) + 1)
        label_df = pd.DataFrame({'Label': labels})
        training_data = training_data.join(label_df)
        training_data.to_csv(f'training_data_{save_idx}.csv')
        save_idx += 1
    
    button.after(2000, check_label)

button.pack()
button.after(1000, check_label)
root.update_idletasks()
root.mainloop()

## Combine Training Data

Assume all files include the same number of data points

In [None]:
import pandas as pd
import os

df = pd.read_csv('training_data_26.csv', encoding='utf-8')[['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2', 'Label']]
timepoint = pd.DataFrame({'timepoint': [time for time, _ in enumerate(df['Delta'])]})
df = timepoint.join(df)

# allows 1000 files at most
for i in range(27, 1000):
    csv_path = f'training_data_{i}.csv'
    if not csv_path in os.listdir('.'):
        break
    df_ = pd.read_csv(csv_path, encoding='utf-8')[['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2', 'Label']]
    df_ = timepoint.join(df_)
    df = pd.concat([df, df_])

## Data Exploration

### Import Packages

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import xgboost

### Check DataFrame

In [None]:
df

### Correlation Matrix

In [None]:
plt.figure(figsize = (15,15))
cor_matrix = df.corr()
sns.heatmap(cor_matrix, annot=True)

### Observe EEG Wave Difference for Confusing / Non-Confusing Data

In [None]:
def confusing_wave_plot(df_confused, df_understand):
    
    sns.set(rc={'figure.figsize':(30,8.27)})
    fig, axs = plt.subplots(4, 2)

    sns.lineplot(data=df_confused, x="timepoint", y="Alpha1",errorbar=None, ax=axs[0,0])
    sns.lineplot(data=df_understand, x="timepoint", y="Alpha1",errorbar=None, ax=axs[0,0])

    sns.lineplot(data=df_confused, x="timepoint", y="Alpha2",errorbar=None, ax=axs[0,1])
    sns.lineplot(data=df_understand, x="timepoint", y="Alpha2",errorbar=None, ax=axs[0,1])

    sns.lineplot(data=df_confused, x="timepoint", y="Beta1",errorbar=None, ax=axs[1,0])
    sns.lineplot(data=df_understand, x="timepoint", y="Beta1",errorbar=None, ax=axs[1,0])

    sns.lineplot(data=df_confused, x="timepoint", y="Beta2",errorbar=None, ax=axs[1,1])
    sns.lineplot(data=df_understand, x="timepoint", y="Beta2",errorbar=None, ax=axs[1,1])

    sns.lineplot(data=df_confused, x="timepoint", y="Gamma1",errorbar=None, ax=axs[2,0])
    sns.lineplot(data=df_understand, x="timepoint", y="Gamma1",errorbar=None, ax=axs[2,0])

    sns.lineplot(data=df_confused, x="timepoint", y="Gamma2",errorbar=None, ax=axs[2,1])
    sns.lineplot(data=df_understand, x="timepoint", y="Gamma2",errorbar=None, ax=axs[2,1])

    sns.lineplot(data=df_confused, x="timepoint", y="Delta",errorbar=None, ax=axs[3,0])
    sns.lineplot(data=df_understand, x="timepoint", y="Delta",errorbar=None, ax=axs[3,0])

    sns.lineplot(data=df_confused, x="timepoint", y="Theta",errorbar=None, ax=axs[3,1])
    sns.lineplot(data=df_understand, x="timepoint", y="Theta",errorbar=None, ax=axs[3,1])

    fig.legend(labels=['confused','not confused'], loc="lower center", ncol=2)
    fig.suptitle('Confused vs Not Confused')

    plt.show()

In [None]:
query = 'Label == 1'
df_confused = df.query(query)

query = 'Label == 0'
df_understand = df.query(query)

confusing_wave_plot(df_confused, df_understand)

In [None]:
def confusing_wave_plot_v(df_confused, df_understand):
    
    sns.set(rc={'figure.figsize':(20,20)})
    fig, axs = plt.subplots(8, 1)

    sns.lineplot(data=df_confused, x="timepoint", y="Alpha1",errorbar=None, ax=axs[0])
    sns.lineplot(data=df_understand, x="timepoint", y="Alpha1",errorbar=None, ax=axs[0])

    sns.lineplot(data=df_confused, x="timepoint", y="Alpha2",errorbar=None, ax=axs[1])
    sns.lineplot(data=df_understand, x="timepoint", y="Alpha2",errorbar=None, ax=axs[1])

    sns.lineplot(data=df_confused, x="timepoint", y="Beta1",errorbar=None, ax=axs[2])
    sns.lineplot(data=df_understand, x="timepoint", y="Beta1",errorbar=None, ax=axs[2])

    sns.lineplot(data=df_confused, x="timepoint", y="Beta2",errorbar=None, ax=axs[3])
    sns.lineplot(data=df_understand, x="timepoint", y="Beta2",errorbar=None, ax=axs[3])

    sns.lineplot(data=df_confused, x="timepoint", y="Gamma1",errorbar=None, ax=axs[4])
    sns.lineplot(data=df_understand, x="timepoint", y="Gamma1",errorbar=None, ax=axs[4])

    sns.lineplot(data=df_confused, x="timepoint", y="Gamma2",errorbar=None, ax=axs[5])
    sns.lineplot(data=df_understand, x="timepoint", y="Gamma2",errorbar=None, ax=axs[5])

    sns.lineplot(data=df_confused, x="timepoint", y="Delta",errorbar=None, ax=axs[6])
    sns.lineplot(data=df_understand, x="timepoint", y="Delta",errorbar=None, ax=axs[6])

    sns.lineplot(data=df_confused, x="timepoint", y="Theta",errorbar=None, ax=axs[7])
    sns.lineplot(data=df_understand, x="timepoint", y="Theta",errorbar=None, ax=axs[7])

    fig.legend(labels=['confused','not confused'], loc="lower center", ncol=2)
    fig.suptitle('Confused vs Not Confused')

    plt.show()

In [None]:
query = 'Label == 1'
df_confused = df.query(query)

query = 'Label == 0'
df_understand = df.query(query)

confusing_wave_plot_v(df_confused, df_understand)

## Training - XGBoost

In [None]:
X = df[['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2']]
Y = df.Label

print(X.shape, Y.shape)
print(Y[Y == 0].shape, Y[Y == 1].shape)

In [None]:
def classify(X, Y, save_path='./xgb_model_m.json', load=False):
    random_state = 42

    x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size=0.2,
                                    random_state=random_state, stratify=Y)
    
    if not load:
        model = xgboost.XGBClassifier(base_score=0.5, learning_rate=0.1, max_depth=6, objective='binary:logistic', eta=0.01)
    else:
        model = xgboost.XGBClassifier()
        model.load_model(save_path)
    
    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)
    predictions = [round(value) for value in y_pred]

    model.save_model(save_path)

    from sklearn.metrics import accuracy_score
    accuracy = accuracy_score(y_test, predictions)
    print('Accuracy: %.2f%%' % (accuracy * 100.0))

    ## draw the tree
    from xgboost import plot_tree
    from matplotlib.pylab import rcParams

    rcParams['figure.figsize'] = 80, 50

    plot_tree(model)
    plt.show()

    ## show the cross validation result
    from sklearn.model_selection import StratifiedKFold
    from sklearn.model_selection import cross_val_score

    kfold = StratifiedKFold(n_splits=5)
    results = cross_val_score(model, X, Y, cv=kfold)
    print('Cross Validation Accuracy: %.2f%% (%.2f%%)' % (results.mean() * 100, results.std() * 100))
    print('')
    print('---------------------------------------------------------------------')
    print('')

    # print feature importance
    print('Feature Importance')
    rcParams['figure.figsize'] = 5, 5
    from xgboost import plot_importance
    plot_importance(model)
    plt.show()

    return model

In [None]:
classify(X, Y)

## Inference - XGBoost

In [None]:
import numpy as np

def inference(X, model):
    pred = model.predict(X)
    confused = round(np.sum(pred) / len(pred))
    
    return confused

In [None]:

import pandas as pd
from tkinter import *
import xgboost
import os

root = Tk()
root.title('Confusing Detector')
screen_width = root.winfo_screenwidth() / 3
screen_height = root.winfo_screenheight() / 2

frame = Frame(root, width=root.winfo_screenwidth(), height=root.winfo_screenheight(), bg='green')

print_num = 0
prev_time = 0

def main(model, eeg_path='./realtime_wave.csv'):
    try:
        global prev_time
        curr_time = round(os.path.getmtime(eeg_path))
        if curr_time == prev_time:
            frame.after(500, main, model)
            return
        prev_time = curr_time
        
        # Step 1: read data
        X = pd.read_csv(eeg_path)[['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2']]

        # Step 2: preprocess data (modify units, ...)
        # X *= 1600

        # Step 3: inference it
        result = inference(X, model)

        # Step 4: print the result
        print(result, end='')
        global print_num
        print_num += 1
        if print_num % 80 == 0:
            print()
        
        if not result:
            frame['bg'] = 'green'
        else:
            frame['bg'] = 'red'

        # Step 5: repeat these steps from time to time
        frame.after(1000, main, model)
    
    except Exception:
        frame.after(1000, main, model)

model = xgboost.XGBClassifier()
model.load_model('./xgb_model_m.json')

frame.pack()
frame.after(1000, main, model)
root.update_idletasks()
root.deiconify()
root.withdraw()
root.geometry('%dx%d+%d+%d' % (screen_width, screen_height, screen_width * 2 - 20, 10))

root.deiconify()
root.mainloop()

## Training - LSTM

### Normalization & split data

In [None]:
from sklearn.preprocessing import StandardScaler

X_ma = df[['Delta','Theta','Alpha1','Alpha2','Beta1','Beta2','Gamma1','Gamma2']]
Y_ma = df.Label

x_lstm = StandardScaler().fit_transform(X_ma)
y_lstm = Y_ma

x_train,x_test,y_train,y_test = train_test_split(x_lstm, y_lstm,test_size=0.2, random_state=42, stratify=y_lstm)


n_features = X_ma.shape[1]
x_train = np.array(x_train).reshape(-1,n_features,1)
x_test = np.array(x_test).reshape(-1,n_features,1)

x_train.shape, x_test.shape,y_train.shape,y_test.shape

### Construct model

In [None]:
import tensorflow.compat.v1 as tf
from tensorflow.keras.layers import Dense, Activation, Flatten, concatenate, Input, Dropout, LSTM, Bidirectional,BatchNormalization,PReLU,ReLU,Reshape
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow import keras

inputs = tf.keras.Input(shape=(n_features,1))

Dense1 = Dense(64, activation = 'relu',kernel_regularizer=keras.regularizers.l2())(inputs)

lstm_1=  Bidirectional(LSTM(256, return_sequences = True))(Dense1)
drop = Dropout(0.3)(lstm_1)
lstm_3=  Bidirectional(LSTM(128, return_sequences = True))(drop)
drop2 = Dropout(0.3)(lstm_3)

flat = Flatten()(drop2)

Dense_2 = Dense(128, activation = 'relu')(flat)
outputs = Dense(1, activation='sigmoid')(Dense_2)

model = tf.keras.Model(inputs, outputs)

model.summary()

### Main training function

In [None]:
def train_model(model,x_train, y_train,x_test,y_test, save_to, epoch):
        opt_adam = keras.optimizers.Adam(learning_rate=0.001)

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
        mc = ModelCheckpoint(save_to + 'best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
        lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.001 * np.exp(-epoch / 10.))
        
        model.compile(optimizer=opt_adam,
                  loss=['binary_crossentropy'],
                  metrics=['accuracy'])
        
        history = model.fit(x_train,y_train,
                        batch_size=20,
                        epochs=epoch,
                        validation_data=(x_test,y_test),
                        callbacks=[es,mc,lr_schedule],verbose=0)
        
        from matplotlib.pylab import rcParams
        rcParams['figure.figsize'] = 5,5

        plt.plot(history.history['accuracy'])
        plt.plot(history.history['val_accuracy'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
        # summarize history for loss
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
        
        return model,history
        
model,history = train_model(model, x_train, y_train,x_test, y_test, save_to= './', epoch = 100)

## Inference - LSTM

In [None]:
import numpy as np

def inference(X, model):
    pred = model.predict(X)[:, 0]
#     print(pred.shape)
    prob = np.sum(pred) / len(pred)
    print(prob)
    confused = round(prob)
    
    return confused

In [None]:
import pandas as pd
from tkinter import *
import os

root = Tk()
root.title('Confusing Detector')
screen_width = root.winfo_screenwidth() / 3
screen_height = root.winfo_screenheight() / 2

frame = Frame(root, width=root.winfo_screenwidth(), height=root.winfo_screenheight(), bg='green')

print_num = 0
prev_time = 0

def main(model, eeg_path='./realtime_wave.csv'):
    try:
        global prev_time
        curr_time = round(os.path.getmtime(eeg_path))
        if curr_time == prev_time:
            frame.after(500, main, model)
            return
        prev_time = curr_time
        
        # Step 1: read data
        X = pd.read_csv(eeg_path)[['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2']]

        # Step 2: preprocess data (modify units, ...)
        X_lstm = StandardScaler().fit_transform(X)
        n_features = X.shape[1]
        X_inference = np.array(X_lstm).reshape(-1,n_features,1)
#         print(X_inference.shape)

        # Step 3: inference it
        result = inference(X_inference, model)

        # Step 4: print the result
        print(result, end='')
        global print_num
        print_num += 1
        if print_num % 80 == 0:
            print()
        
        if not result:
            frame['bg'] = 'green'
        else:
            frame['bg'] = 'red'

        # Step 5: repeat these steps from time to time
        frame.after(500, main, model)
    
    except Exception:
        frame.after(500, main, model)

inputs = tf.keras.Input(shape=(n_features,1))
Dense1 = Dense(64, activation = 'relu',kernel_regularizer=keras.regularizers.l2())(inputs)
lstm_1=  Bidirectional(LSTM(256, return_sequences = True))(Dense1)
drop = Dropout(0.3)(lstm_1)
lstm_3=  Bidirectional(LSTM(128, return_sequences = True))(drop)
drop2 = Dropout(0.3)(lstm_3)
flat = Flatten()(drop2)
Dense_2 = Dense(128, activation = 'relu')(flat)
outputs = Dense(1, activation='sigmoid')(Dense_2)
model = tf.keras.Model(inputs, outputs)
model.load_weights("./best_model.h5")

frame.pack()
frame.after(1000, main, model)
root.update_idletasks()
root.deiconify()
root.withdraw()
root.geometry('%dx%d+%d+%d' % (screen_width, screen_height, screen_width * 2 - 20, 10))

root.deiconify()
root.mainloop()