In [None]:
from mne.io import read_raw_edf
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import plotly.express as px

def check_activity_equal(fileindex):
    csv_filepath = f'data/{fileindex}.csv'
    raw = read_edf_by_fileindex(fileindex)
    data = raw.get_data(picks='Activity')[0]
    computed_activity_means_from_edf = []
    for i in range(0,len(data),5000):
        computed_activity_means_from_edf.append(data[i:i+5000].mean())
    del data
    true_activity_means_from_csv = pd.read_csv(csv_filepath)['Activity (Mean, 10s)'].to_numpy()
    if(len(computed_activity_means_from_edf) != len(true_activity_means_from_csv)):
        print('Activity vector lengths not equal!')
        return False,computed_activity_means_from_edf,true_activity_means_from_csv
    if(not np.allclose(computed_activity_means_from_edf,true_activity_means_from_csv,atol=.001)):
        print('Activity vectors not equal within atol of .001')
        return False,computed_activity_means_from_edf,true_activity_means_from_csv
    return True,computed_activity_means_from_edf,true_activity_means_from_csv
def check_emg_equal(fileindex):
    csv_filepath = f'data/{fileindex}.csv'
    raw = read_edf_by_fileindex(fileindex)
    data = raw.get_data(picks='EMG')[0]
    computed_emg_means_from_edf = []
    for i in range(0,len(data),5000):
        computed_emg_means_from_edf.append(data[i:i+5000].mean())
    del data
    true_emg_means_from_csv = pd.read_csv(csv_filepath)['EEG 2 (Mean, 10s)'].to_numpy()/1000000
    if(len(computed_emg_means_from_edf) != len(true_emg_means_from_csv)):
        print('EMG vector lengths not equal!')
        return False,computed_emg_means_from_edf,true_emg_means_from_csv
    if(not np.allclose(computed_emg_means_from_edf,true_emg_means_from_csv,atol=.01)):
        print('EMG vectors not equal within atol of .001')
        return False,computed_emg_means_from_edf,true_emg_means_from_csv
    return True,computed_emg_means_from_edf,true_emg_means_from_csv
def read_edf_by_fileindex(fileindex):
    filepath = f'data/{fileindex}.edf'
    raw = read_raw_edf(filepath,verbose=False)
    raw.rename_channels({'EEG 1':'EEG','EEG 2':'EMG'})
    raw.set_channel_types({'Activity':'misc',
                        'EEG':'eeg',
                        'EMG':'emg',
                        'HD BattVoltage':'misc',
                        'On Time':'misc',
                        'Signal Strength':'misc',
                        'Temperature':'misc'})
    return raw

In [None]:
for i in range(32):
    activity,_,_ = check_activity_equal(i)
    emg,_,_ = check_emg_equal(i)
    print(i,activity,emg)

"""
    here we check whether files are aligned via proxy activity signals. for each csv file receieved from med school, there are computed...
    only fileindex 14 returns false,
"""

In [None]:
eq,a,b = check_activity_equal(14)
largest = 0
largest_index = -1
for i,(x,y) in enumerate(zip(a,b)):
    if(abs(x-y)>largest):
        largest = abs(x-y)
        largest_index = i
        print(largest)
largest
largest_index
plt.plot(a[largest_index-10:largest_index+10])
plt.plot(b[largest_index-10:largest_index+10])

In [None]:
# format X,Y
X = read_edf_by_fileindex(0).get_data(picks='EEG')
Y = pd.read_csv('data/0.csv')['label']
Y[Y=='P'] = .005
Y[Y=='S'] = .003
Y[Y=='W'] = .0045
y = []
for label in Y:
    y.append([label]*5000)
y = np.array(y).flatten()
# y = pd.Categorical(y).codes
del Y

In [None]:
plot_df = pd.DataFrame([X[:4000000][::20],y[:4000000][::20]],index=['X','label']).T
fig = px.line(data_frame=plot_df,y=['X','label'])
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),
                             type="linear"))
fig['layout']['xaxis'].update(range=[0,2250])
fig.show(renderer='browser')

In [None]:
X = read_edf_by_fileindex(0).get_data(picks='EEG').T
Y = pd.read_csv('data/0.csv')['label']
y = pd.Categorical(Y).codes

# Use this class to standardize the values of the features.
from sklearn.preprocessing import StandardScaler
print(X.mean(),X.var())
scaler = StandardScaler()
scaler.fit(X)
print(scaler.mean_)
print(scaler.var_)
X = scaler.transform(X)
print(X.mean(),X.var())

In [None]:
X_win = []
for i in range(0,len(X),5000):
    X_win.append(X[i:i+5000])
X_win = np.array(X_win).squeeze()


In [None]:
# Use this class to create a multilayer perceptron (MLP) classifier.
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(512,),activation='relu',solver='adam',verbose=True,validation_fraction=.2,early_stopping=True)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(np_Xs_win,np_Ys_win,test_size=.2,random_state=42,stratify=np_Ys_win)
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size=.2,random_state=42,stratify=y_train)

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score
clf.fit(X_train,y_train)

In [None]:
plt.plot(clf.loss_curve_)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

In [None]:
y_pred

In [None]:
import tensorflow as tf
from tensorflow import keras
BATCH_SIZE=64
EPOCHS=512
model = tf.keras.Sequential([
    keras.layers.Dense(100, activation='relu',input_shape=(X_train.shape[-1],)),
    keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(3, activation='softmax')
])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
            metrics=[
    keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc')
])
baseline_history = model.fit(
X_train,
tf.one_hot(y_train, 3),
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(X_val, tf.one_hot(y_val, depth=3)),
callbacks=[keras.callbacks.ModelCheckpoint(
    f"best_model.h5", save_best_only=True, monitor="val_loss",verbose=1
),
keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
),
keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1,mode='min',restore_best_weights=True),])

In [None]:
model.metrics_names

In [None]:
for key in baseline_history.history:
    print(key)
plt.plot(baseline_history.history['categorical_accuracy'])
baseline_results = model.evaluate(X_test, tf.one_hot(y_test,depth=3),
                                    batch_size=64, verbose=0)
baseline_results

In [None]:
y

In [None]:
Xs_win = []
ys = []
for i in range(10):
    X = read_edf_by_fileindex(i).get_data(picks='EEG').T
    Y = pd.read_csv(f'data/{i}.csv')['label']
    y = pd.Categorical(Y).codes
    ys.append(y)
    del Y
    # Use this class to standardize the values of the features.
    from sklearn.preprocessing import StandardScaler
    print(X.mean(),X.var())
    scaler = StandardScaler()
    scaler.fit(X)
    print(scaler.mean_)
    print(scaler.var_)
    X = scaler.transform(X)
    print(X.mean(),X.var())
    X_win = []
    for i in range(0,len(X),5000):
        X_win.append(X[i:i+5000])
    X_win = np.array(X_win).squeeze()
    Xs_win.append(X_win)

In [None]:
Xs = Xs_win[0]
for i in range(1,10):
    Xs = np.concatenate([Xs,Xs_win[i]])
del Xs_win
Ys = ys[0]
for i in range(1,10):
    Ys = np.concatenate([Ys,ys[i]])
del ys

In [None]:
# Use this class to create a multilayer perceptron (MLP) classifier.
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(512,),activation='relu',solver='adam',verbose=True,validation_fraction=.2,early_stopping=True)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(Xs,Ys,test_size=.2,random_state=42,stratify=Ys)
del Xs
del Ys
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size=.2,random_state=42,stratify=y_train)

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score
clf.fit(X_train,y_train)