In [229]:
import pandas as pd
from utils import get_dfs, extract_classes
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras import backend as K
from keras.utils import to_categorical
from tensorflow import keras
# from model import run_model


In [311]:
import tensorflow as tf
import tensorflow.keras.layers as tfl

def get_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=(input_shape[1], input_shape[2]))

    model = tfl.Dropout(0.05)(inputs)
    model = tfl.Conv1D(filters=16, kernel_size=120, strides=6, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=32, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
    model = tfl.ReLU()(model)
    model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

    model = tfl.LSTM(128, return_sequences=True)(model)
    model = tfl.ReLU()(model)

    model = tfl.LSTM(128, return_sequences=True)(model)
    model = tfl.ReLU()(model)

    model = tfl.LSTM(128, return_sequences=True)(model)
    model = tfl.ReLU()(model)

    prediction_layer = tfl.Dense(num_classes, activation='softmax')
    # prediction_layer = tfl.Dropout(0.5)(prediction_layer)

    outputs = prediction_layer(model)
    model = tf.keras.Model(inputs, outputs)

    return model


def run_model(dataset_train, dataset_val, input_shape, num_classes, epochs=10):
    es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)
    model = get_model(input_shape, num_classes)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy', 'mean_absolute_error', 'categorical_accuracy', 'categorical_crossentropy'])

    path_checkpoint = "model_checkpoint.h5"

    modelckpt_callback = keras.callbacks.ModelCheckpoint(
        monitor="sparse_categorical_crossentropy",
        filepath=path_checkpoint,
        verbose=1,
        save_weights_only=True,
        save_best_only=True,
    )

    hist = model.fit(dataset_train, epochs=epochs, validation_data=dataset_val)

    return model

In [82]:
hdf_file_path = [
    "../data/20221029-171117.hdf5",
    "../data/20221029-192231.hdf5",
    "../data/20221029-200201.hdf5",
    "../data/20221029-202757.hdf5",
    "../data/othmane_assis_EEG_20221029-231521.hdf5"
]

result_filtered = None
result_events = None

for path in hdf_file_path:
    _, df_events, df_filtered, _ = get_dfs(path)
    if result_filtered is None:
        result_filtered = df_filtered
    else:
        result_filtered = pd.concat([result_filtered, df_filtered])

    if result_events is None:
        result_events = df_events
    else:
        result_events = pd.concat([result_events, df_events])
# Clean events to have only 3 classes
result_events.replace('"repos"', 'neutral', inplace=True)
result_events.replace(['"calme"', '"lent"'], 'positive', inplace=True)
result_events.replace(['"rapide"', '"agite"'], 'negative', inplace=True)
result_events = result_events[result_events.data != '"fin"']
result_events = result_events[result_events.data != '"calme"']
result_events = result_events[result_events.data != '"agite"']
result_events = result_events[result_events.data != '"interuption"']

In [83]:
result_filtered

Unnamed: 0,Fpz
2022-10-29 17:11:18.410303,-4.440892e-16
2022-10-29 17:11:18.420303,2.575171e+00
2022-10-29 17:11:18.430303,5.821741e+00
2022-10-29 17:11:18.440303,2.818556e+00
2022-10-29 17:11:18.450303,-4.639568e-01
...,...
2022-10-29 23:23:28.451257,-1.243556e+00
2022-10-29 23:23:28.461257,1.930139e+00
2022-10-29 23:23:28.471257,2.632404e+00
2022-10-29 23:23:28.481257,2.252475e+00


In [84]:
result_events

Unnamed: 0,label,data
2022-10-29 17:11:28.111000000,start,neutral
2022-10-29 17:12:16.822699951,start,negative
2022-10-29 17:13:23.859899902,start,neutral
2022-10-29 17:14:01.688500000,start,positive
2022-10-29 17:14:58.447899902,start,negative
...,...,...
2022-10-29 23:22:04.156000000,start,neutral
2022-10-29 23:22:23.637600098,start,negative
2022-10-29 23:22:45.314100098,start,positive
2022-10-29 23:23:04.535300049,start,neutral


In [105]:
print(result_events.data.unique())

['neutral' 'negative' 'positive']


In [420]:
def prepare_ts_for_training(ts_df, event_df):
    df_lag = ts_df.copy()
    for i in range(1, 100):
        df_lag = df_lag.merge(ts_df.shift(i), how='inner', left_index=True, right_index=True, suffixes=('',f'_{i:02d}'))
    ts_df = df_lag.dropna().copy()
    del df_lag

    res = None
    classes = extract_classes(event_df)
    tmp_event_df = event_df.copy()
    for c in range(len(classes)):
        start_events = event_df.loc[(event_df.data == classes[c])]
        ref_start_date = start_events.index[0]
        start = event_df.loc[(event_df.data == classes[c]) & (event_df.label == 'start')].index[0]
        end = event_df.loc[(event_df.data == classes[c]) & (event_df.label == 'stop')].index

        if len(end) == 0:
            end = ts_df.index[-1]
        else:
            end = end[0]

        if c < len(classes) - 2:
            tmp_event_df = tmp_event_df.drop(tmp_event_df.index[0])

        current = ts_df[start:end].head(30000)
        current['class'] = classes[c].replace('"', '')
        if res is None:
            res = current
        else:
            res = pd.concat([res, current])
    # res['date'] = res.index
    return res


In [421]:
final_df = prepare_ts_for_training(df_filtered, df_events)

In [422]:
final_df

Unnamed: 0,Fpz,Fpz_01,Fpz_02,Fpz_03,Fpz_04,Fpz_05,Fpz_06,Fpz_07,Fpz_08,Fpz_09,...,Fpz_91,Fpz_92,Fpz_93,Fpz_94,Fpz_95,Fpz_96,Fpz_97,Fpz_98,Fpz_99,class
2022-10-29 23:16:35.221257,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,-0.569524,-0.715359,1.636246,...,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,4.123820,5.224463,5.655671,repos
2022-10-29 23:16:35.231257,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,-0.569524,-0.715359,...,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,4.123820,5.224463,repos
2022-10-29 23:16:35.241257,-2.104169,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,-0.569524,...,8.057441,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,4.123820,repos
2022-10-29 23:16:35.251257,-3.855607,-2.104169,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,...,6.376958,8.057441,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,repos
2022-10-29 23:16:35.261257,-7.218886,-3.855607,-2.104169,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,...,4.146209,6.376958,8.057441,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,repos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-29 23:21:35.171257,-31.862460,-31.087578,-30.445404,-29.558095,-29.643976,-29.933575,-30.581057,-33.726111,-35.695717,-36.243121,...,27.861026,28.292434,28.726718,29.163887,29.603950,30.046916,30.492794,30.941594,31.393322,repos
2022-10-29 23:21:35.181257,-31.800383,-31.862460,-31.087578,-30.445404,-29.558095,-29.643976,-29.933575,-30.581057,-33.726111,-35.695717,...,27.432485,27.861026,28.292434,28.726718,29.163887,29.603950,30.046916,30.492794,30.941594,repos
2022-10-29 23:21:35.191257,-31.376425,-31.800383,-31.862460,-31.087578,-30.445404,-29.558095,-29.643976,-29.933575,-30.581057,-33.726111,...,27.006802,27.432485,27.861026,28.292434,28.726718,29.163887,29.603950,30.046916,30.492794,repos
2022-10-29 23:21:35.201257,-30.381883,-31.376425,-31.800383,-31.862460,-31.087578,-30.445404,-29.558095,-29.643976,-29.933575,-30.581057,...,26.583968,27.006802,27.432485,27.861026,28.292434,28.726718,29.163887,29.603950,30.046916,repos


In [424]:
split_fraction = 0.715
train_split = int(split_fraction * int(final_df.shape[0]))
step = 100

past = 8000
future = 3000
learning_rate = 0.001
batch_size = 15000
epochs = 10

start = past + future
end = start + train_split


def normalize(data, train_split):
    data_mean = data[:train_split].mean(axis=0)
    data_std = data[:train_split].std(axis=0)
    return (data - data_mean) / data_std


In [425]:
def load_ds():
    feature_keys = final_df.columns
    print(feature_keys)
    selected_features = [feature_keys[i] for i in range(len(feature_keys))]
    features = final_df[selected_features]

    le = preprocessing.LabelEncoder()
    features['class'] = le.fit_transform(features['class'])
    
    # features.index = final_df['class']
    # display(features.head())
    
    features = normalize(features.values, train_split)
    features = pd.DataFrame(features)
    features[features.columns[-1]] = le.fit_transform(final_df['class'])
    # display(features.head())

    train_data = features.loc[0 : train_split - 1]
    val_data = features.loc[train_split:]


    start = past + future
    end = start + train_split

    x_train = train_data[[i for i in range(len(feature_keys) - 1)]].values
    y_train = features.iloc[start:end][features.columns[-1]]

    sequence_length = int(past / step)

    dataset_train = keras.preprocessing.timeseries_dataset_from_array(
        x_train,
        y_train,
        sequence_length=sequence_length,
        sampling_rate=step,
        batch_size=batch_size,
    )

    sequence_length = int(past / step)


    x_end = len(val_data) - past - future
    label_start = train_split + past + future
    x_val = val_data.iloc[:x_end][[i for i in range(len(feature_keys) - 1)]].values
    y_val = features.iloc[label_start:][features.columns[-1]]
    # display(x_val.shape)
    dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,)
    return dataset_train, dataset_val

In [426]:
dataset_train, dataset_val = load_ds()


for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Index(['Fpz', 'Fpz_01', 'Fpz_02', 'Fpz_03', 'Fpz_04', 'Fpz_05', 'Fpz_06',
       'Fpz_07', 'Fpz_08', 'Fpz_09',
       ...
       'Fpz_91', 'Fpz_92', 'Fpz_93', 'Fpz_94', 'Fpz_95', 'Fpz_96', 'Fpz_97',
       'Fpz_98', 'Fpz_99', 'class'],
      dtype='object', length=101)
Input shape: (15000, 80, 100)
Target shape: (15000,)


In [427]:
# inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
# lstm_out = keras.layers.LSTM(32)(inputs)
# outputs = keras.layers.Dense(3)(lstm_out)

# model = keras.Model(inputs=inputs, outputs=outputs)
# model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
# model.summary()




inputs = tf.keras.Input(shape=(inputs.shape[1], inputs.shape[2]))

model = tfl.Dropout(0.05)(inputs)
model = tfl.Conv1D(filters=16, kernel_size=120, strides=6, padding="same")(inputs)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=32, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.Conv1D(filters=80, kernel_size=3, strides=1, padding="same")(model)
model = tfl.ReLU()(model)
model = tfl.MaxPool1D(pool_size=2, strides=2, padding='same')(model)

model = tfl.LSTM(128, return_sequences=True)(model)
model = tfl.ReLU()(model)

model = tfl.LSTM(128, return_sequences=True)(model)
model = tfl.ReLU()(model)

model = tfl.LSTM(128, return_sequences=True)(model)
model = tfl.ReLU()(model)

outputs = tfl.Dense(3, activation='softmax')(model)
# prediction_layer = tfl.Dropout(0.5)(prediction_layer)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="sparse_categorical_crossentropy", metrics=['accuracy'])
model.summary()


Model: "model_80"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_97 (InputLayer)       [(None, 80, 100)]         0         
                                                                 
 conv1d_557 (Conv1D)         (None, 14, 16)            192016    
                                                                 
 re_lu_745 (ReLU)            (None, 14, 16)            0         
                                                                 
 max_pooling1d_553 (MaxPooli  (None, 7, 16)            0         
 ng1D)                                                           
                                                                 
 conv1d_558 (Conv1D)         (None, 7, 32)             1568      
                                                                 
 re_lu_746 (ReLU)            (None, 7, 32)             0         
                                                          

In [428]:
path_checkpoint = "model_checkpoint.h5"
es_callback = keras.callbacks.EarlyStopping(monitor="categorical_crossentropy", min_delta=0, patience=5)

modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="categorical_crossentropy",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

history = model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val,
)


Epoch 1/10
 2/29 [=>............................] - ETA: 2:23 - loss: 1.0987 - accuracy: 0.1333    