In [1]:
import os
import pickle
import scipy.signal
from scipy import fft
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
DATA_PATH = os.path.realpath("../data/WESAD")

In [3]:
class Subject:

    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        self.subject_keys = ['signal', 'label', 'subject']
        self.signal_keys = ['chest', 'wrist']
        self.chest_keys = ['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']
        self.wrist_keys = ['ACC', 'BVP', 'EDA', 'TEMP']
        with open(os.path.join(main_path, self.name) + '/' + self.name + '.pkl', 'rb') as file:
            self.data = pickle.load(file, encoding='latin1')
        self.labels = self.data['label']

    def get_wrist_data(self):
        data = self.data['signal']['wrist']
        return data

    def get_chest_data(self):
        return self.data['signal']['chest']
    
    def get_subject_dataframe(self):
        wrist_data = self.get_wrist_data()
        bvp_signal = wrist_data['BVP'][:,0]
        eda_signal = wrist_data['EDA'][:,0]
        acc_x_signal = wrist_data['ACC'][:,0]
        acc_y_signal = wrist_data['ACC'][:,1]
        acc_z_signal = wrist_data['ACC'][:,2]
        temp_signal = wrist_data['TEMP'][:,0]
        # Upsampling data to match BVP data sampling rate using fourier method as described in Paper/dataset
        eda_upsampled = scipy.signal.resample(eda_signal, len(bvp_signal))
        temp_upsampled = scipy.signal.resample(temp_signal, len(bvp_signal))
        acc_x_upsampled = scipy.signal.resample(acc_x_signal, len(bvp_signal))
        acc_y_upsampled = scipy.signal.resample(acc_y_signal, len(bvp_signal))
        acc_z_upsampled = scipy.signal.resample(acc_z_signal, len(bvp_signal))
        label_df = pd.DataFrame(self.labels, columns=['label'])
        label_df.index = [(1 / 700) * i for i in range(len(label_df))] # 700 is the sampling rate of the label
        label_df.index = pd.to_datetime(label_df.index, unit='s')
        data_arrays = zip(bvp_signal, eda_upsampled, acc_x_upsampled, acc_y_upsampled, acc_z_upsampled, temp_upsampled)
        df = pd.DataFrame(data=data_arrays, columns=['BVP', 'EDA', 'ACC_x', 'ACC_y', 'ACC_z', 'TEMP'])
        df.index = [(1 / 64) * i for i in range(len(df))] # 64 = sampling rate of BVP
        df.index = pd.to_datetime(df.index, unit='s')
        df = df.join(label_df)
        df['label'] = df['label'].fillna(method='ffill')
        df.reset_index(drop=True, inplace=True)
        df.drop(df[df['label'].isin([0.0, 4.0, 5.0, 6.0, 7.0])].index, inplace=True)
        df['label'] = df['label'].replace([1.0, 2.0, 3.0], [0, 1, 0])
        df.reset_index(drop=True, inplace=True)
        return df


In [4]:
subjects = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]
subjects_data = {}
for subject_num in subjects:
    subject = Subject(DATA_PATH, subject_num)
    subjects_data[subject.name] = subject.get_subject_dataframe()


In [326]:
subjects_data['S10']

Unnamed: 0,BVP,EDA,ACC_x,ACC_y,ACC_z,TEMP,label
0,-80.81,0.373519,24.000000,1.191478e-14,59.000000,33.250000,0.0
1,-72.98,0.370807,24.540895,4.696265e-01,58.750794,33.249310,0.0
2,-59.87,0.368285,24.000000,1.000000e+00,58.000000,33.248502,0.0
3,-41.01,0.366042,23.296980,1.633370e+00,57.531157,33.247577,0.0
4,-17.43,0.364157,24.000000,2.000000e+00,57.000000,33.246538,0.0
...,...,...,...,...,...,...,...
145723,-2.43,1.609115,48.672575,-2.504169e+01,-30.020864,32.929964,1.0
145724,-3.92,1.608968,49.000000,-2.500000e+01,-30.000000,32.929971,1.0
145725,-5.92,1.608838,49.326011,-2.495741e+01,-29.984345,32.929979,1.0
145726,-8.47,1.608725,49.000000,-2.500000e+01,-30.000000,32.929987,1.0


In [327]:
s10 = Subject(DATA_PATH, '10')
s10_wrist_data = s10.get_wrist_data()
bvp_signal = s10_wrist_data['BVP'][:,0]
eda_signal = s10_wrist_data['EDA'][:,0]
acc_x_signal = s10_wrist_data['ACC'][:,0]
acc_y_signal = s10_wrist_data['ACC'][:,1]
acc_z_signal = s10_wrist_data['ACC'][:,2]
temp_signal = s10_wrist_data['TEMP'][:,0]
labels = s10.labels

In [328]:
# Upsampling data to match BVP data sampling rate using fourier method as described in Paper/dataset
eda_upsampled = scipy.signal.resample(eda_signal, len(bvp_signal))
temp_upsampled = scipy.signal.resample(temp_signal, len(bvp_signal))
acc_x_upsampled = scipy.signal.resample(acc_x_signal, len(bvp_signal))
acc_y_upsampled = scipy.signal.resample(acc_y_signal, len(bvp_signal))
acc_z_upsampled = scipy.signal.resample(acc_z_signal, len(bvp_signal))

In [329]:
label_df = pd.DataFrame(s10.labels, columns=['label'])
label_df.index = [(1 / 700) * i for i in range(len(label_df))] # 700 is the sampling rate of the label
label_df.index = pd.to_datetime(label_df.index, unit='s')

In [330]:
data_arrays = zip(bvp_signal, eda_upsampled, acc_x_upsampled, acc_y_upsampled, acc_z_upsampled, temp_upsampled)
df = pd.DataFrame(data=data_arrays, columns=['BVP', 'EDA', 'ACC_x', 'ACC_y', 'ACC_z', 'TEMP'])
df.index = [(1 / 64) * i for i in range(len(df))] # 64 = sampling rate of BVP
df.index = pd.to_datetime(df.index, unit='s')
df = df.join(label_df)
df['label'] = df['label'].fillna(method='ffill')
df.reset_index(drop=True, inplace=True)


In [331]:
df.drop(df[df['label'].isin([0.0, 4.0, 5.0, 6.0, 7.0])].index, inplace=True)

In [332]:
df['label'] = df['label'].replace([1.0, 2.0, 3.0], [0, 1, 0])

In [333]:
df.reset_index(drop=True, inplace=True)

##### Anwendung von Fourier Transformation (scipy.fft) am Beispiel: https://realpython.com/python-scipy-fft/
Wichtige Parameter:
1. **SAMPLE_RATE** (in Hertz) determines how many data points the signal uses to represent the sine wave per second. So if the signal had a sample rate of 10 Hz and was a five-second sine wave, then it would have 10 * 5 = 50 data points.
2. **DURATION** (in Seconds) is the length of the generated sample. -> Frage hierzu: können wir überhaupt duration in seconds nehmen, wenn wir gar nicht mit den Zeitstempeln arbeiten, oder ist das irrelvant?

Für *fftfreq* wird N (= SAMPLE_RATE * DURATION) und 1/SAMPLE_RATE

### Window

In [12]:
signal_subwindow_dict = {
    'ACC_x': 7,
    'ACC_y': 7,
    'ACC_z': 7,
    'BVP': 30,
    'EDA': 30,
    'TEMP': 35
    }

In [13]:
# most frequent element in list
def most_common(lst):
    return max(set(lst), key=lst.count)

In [14]:
def create_windows(df: pd.DataFrame) -> tuple[pd.DataFrame,list]:

    window_len = 64 * 60 # fs = 64 and window length in seconds = 60
    windows, labels = zip(*[(df[i:i+window_len], int(most_common(df['label'][i:i+window_len].to_list()))) for i in range(0,df.shape[0],window_len)])
    return windows, labels

In [15]:
def create_subwindows(df: pd.DataFrame, signal_subwindow_len: int, signal_name: str) -> list:
    subwindow_len = 64 * signal_subwindow_len # fs = 64 and sub-window length in seconds = 30
    window_len = 64 * 60 # fs = 64 and window length in seconds = 60
    window_shift = int(64 * 0.25) # fs = 64 and window shift in seconds = 0.25
    subwindows = []

    for i in range(0, window_len, window_shift):
        if i + subwindow_len <= window_len:
            subwindow = df[signal_name][i:i+subwindow_len]
            subwindows.append(subwindow)
    return subwindows
            

In [16]:
def fft_subwindows(subwindows: list, duration: int, f_s: int) -> list:
    freqs= []
    yfs = []
    for subwindow in subwindows:
        y = np.array(subwindow)
        yf = scipy.fft.fft(y)
        N = f_s * duration
        freq = scipy.fft.fftfreq(N, 1/f_s)
        freqs.append(freq)
        yfs.append(yf)
    return freqs, yfs
    

In [None]:
        #plt.plot(freq, np.abs(yf))
        #plt.plot(freq, yf)
        #ax.set_xlabel('Frequency in Hertz [Hz]')
        #ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
        #ax.set_xlim(0, 64)

In [17]:
def average_window(subwindows_fft: list) -> list:
    # len_list_yfs = [len(subwindows_fft[i]) for i in range(len(subwindows_fft))]
    len_yfs = len(subwindows_fft[0])
    avg_yfs = []
    for i in range(len_yfs):
        i_yfs = []
        for yf in subwindows_fft:
            try:
                i_yfs.append(yf[i])
            except IndexError:
                pass
        avg_yfs.append(sum(i_yfs)/len(i_yfs))
    return avg_yfs

In [23]:
subjects_data['S2']

Unnamed: 0,BVP,EDA,ACC_x,ACC_y,ACC_z,TEMP,label
0,-33.58,1.640539,65.000000,12.000000,21.000000,35.810000,0.0
1,-17.37,1.642583,68.376142,16.105082,13.134411,35.810036,0.0
2,-1.16,1.644539,70.000000,16.000000,13.000000,35.810071,0.0
3,13.32,1.646320,71.921919,6.385358,20.973699,35.810102,0.0
4,25.33,1.647845,68.000000,-4.000000,29.000000,35.810130,0.0
...,...,...,...,...,...,...,...
135739,-37.93,0.214876,27.845314,56.636610,3.227253,34.072293,0.0
135740,-33.57,0.214718,28.000000,57.000000,4.000000,34.072078,0.0
135741,-28.23,0.214575,28.600147,57.012350,4.384087,34.071745,0.0
135742,-22.98,0.214449,29.000000,57.000000,4.000000,34.071290,0.0


In [24]:
#{S10: X:[], y:[])}
subjects_preprosessed_data = {}
for subject_name, subject_df in subjects_data.items():
    subjects_preprosessed_data[subject_name] = {}
    windows, labels = create_windows(subject_df)
    yfs_per_min_for_signal = {}
    X = []
    for i in range(0,len(windows) - 1):
        for signal in signal_subwindow_dict.keys():

            duration_in_sec = signal_subwindow_dict[signal]

            subwindows = create_subwindows(windows[i], signal_subwindow_len=duration_in_sec, signal_name=signal)
            freqs, yfs = fft_subwindows(subwindows, duration_in_sec, 64)
            yfs_average = average_window(yfs)[:210]
            yfs_per_min_for_signal[signal] = yfs_average
            
        X.append(pd.DataFrame(yfs_per_min_for_signal).T)
    y = list(labels[:len(windows)-1])
    subjects_preprosessed_data[subject_name]['X'] = X
    subjects_preprosessed_data[subject_name]['y'] = y


In [31]:
all_subjects_X = []
all_subjects_y = []
for subject_name, subject_data in subjects_preprosessed_data.items():
    all_subjects_X.append(subject_data['X'])
    all_subjects_y.append(subject_data['y'])


# Model

In [36]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneGroupOut

In [41]:
all_subjects_X

[[                          0                      1                       2    \
  ACC_x  14738.100574+0.000000j  24.452126-218.462132j   13.335670-116.993358j   
  ACC_y   1980.292895+0.000000j   2.330467+306.322547j -0.359543+0151.3957090j   
  ACC_z  16688.338053+0.000000j -15.585632+172.887127j  -9.905585+092.6309790j   
  BVP     17.4200830+0.0000000j   80.095522-34.449109j -204.525358-103.092277j   
  EDA     2413.453386+0.000000j   19.775094-46.561065j   6.456479-027.5502330j   
  TEMP   80212.930132+0.000000j -7.68610-0011.0363600j -2.05810500-6.12822400j   
  
                            3                       4                      5    \
  ACC_x   7.551783-081.2990690j   4.125133-061.4544220j  2.662953-048.8300160j   
  ACC_y -1.206010+0100.4576780j  -1.830450+075.8036340j -1.947804+060.9507170j   
  ACC_z  -4.888711+064.9177320j  -1.877266+048.5735840j -1.068785+038.0350460j   
  BVP    188.138163+242.705589j  126.607618+164.531875j  60.471420-206.198505j   
  EDA     1.9

In [75]:
# Define the model architecture
num_signals = 6
num_output_class = 2
model = tf.keras.Sequential()
#input_shape = 14 Signale (bei uns max. 6) X 210 Inputs (aus Tabelle nach Fourier)
model.add(tf.keras.layers.InputLayer(input_shape=[num_signals, 210, 1]))
model.add(tf.keras.layers.Conv2D(filters=64 ,activation='relu', kernel_size=(1,3), strides=1, padding='same')) 
#hier fehlen: Output shape, was bringt mir param#, kernel_size != Kernels, Size?
model.add(tf.keras.layers.Dropout(rate=0.3)) # portion gibts nicht? und es ist 'rate' gefordert???
model.add(tf.keras.layers.Conv2D(filters=64, activation='relu', kernel_size=(1,3), strides=1, padding='same'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(1,2)))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.Conv2D(filters=64, activation='relu', kernel_size=(1,3), strides=1, padding='same'))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(1,2)))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=128, activation='relu', kernel_initializer='glorot_uniform'))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform'))
model.add(tf.keras.layers.Dropout(rate=0.3))
#Anzahl der Units = Anzahl der Klassen (2 - non-stress vs stress)
model.add(tf.keras.layers.Dense(units=num_output_class, activation='softmax', kernel_initializer='glorot_uniform'))


In [84]:
groups_set = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

test_loss = []
test_acc = []

for i in groups_set:
    test_index = groups_set[i]
    train_index = [x for x in groups_set if x != test_index]
    print(train_index, test_index)

    X_train = np.concatenate(np.array([all_subjects_X[x] for x in train_index]))
    y_train = np.concatenate(np.array([all_subjects_y[y] for y in train_index]))
    X_test = all_subjects_X[test_index]
    y_test = all_subjects_y[test_index]

    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)
    X_test = np.asarray(X_test)
    y_test = np.asarray(y_test)

    tf.keras.backend.clear_session()

    y_train = tf.keras.utils.to_categorical(y_train, num_output_class)
    y_test = tf.keras.utils.to_categorical(y_test, num_output_class)

    # Define the model architecture
    num_signals = 6
    num_output_class = 2
    model = tf.keras.Sequential()
    #input_shape = 14 Signale (bei uns max. 6) X 210 Inputs (aus Tabelle nach Fourier)
    model.add(tf.keras.layers.InputLayer(input_shape=[num_signals, 210, 1]))
    model.add(tf.keras.layers.Conv2D(filters=64 ,activation='relu', kernel_size=(1,3), strides=1, padding='same')) 
    #hier fehlen: Output shape, was bringt mir param#, kernel_size != Kernels, Size?
    model.add(tf.keras.layers.Dropout(rate=0.3)) # portion gibts nicht? und es ist 'rate' gefordert???
    model.add(tf.keras.layers.Conv2D(filters=64, activation='relu', kernel_size=(1,3), strides=1, padding='same'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(1,2)))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.Conv2D(filters=64, activation='relu', kernel_size=(1,3), strides=1, padding='same'))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(1,2)))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units=128, activation='relu', kernel_initializer='glorot_uniform'))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform'))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    #Anzahl der Units = Anzahl der Klassen (2 - non-stress vs stress)
    model.add(tf.keras.layers.Dense(units=num_output_class, activation='sigmoid', kernel_initializer='glorot_uniform'))

    model.compile(optimizer="rmsprop",loss='binary_crossentropy', metrics=['accuracy'])

    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        "wesad_binary.h5",      # Path to save the model file
        monitor="val_loss", # The metric name to monitor
        save_best_only=True # If True, it only saves the "best" model according to the quantity monitored 
    )
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", # Quantity to be monitored.
        min_delta=0.01,     # Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.
        patience=5,        # Number of epochs with no improvement after which training will be stopped.
    )

    history = model.fit(
    x=X_train, 
    y=y_train,
    epochs=10, 
    batch_size=50,
    #callbacks = [checkpoint, early_stopping]
)

    score = model.evaluate(X_test, y_test, verbose=0) 
    print('Test loss:', round(score[0], 2)) 
    print('Test accuracy:', round(score[1], 2))

    test_loss.append(score[0])
    test_acc.append(score[1])

    

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 0
Epoch 1/10


  X_train = np.concatenate(np.array([all_subjects_X[x] for x in train_index]))
  y_train = np.concatenate(np.array([all_subjects_y[y] for y in train_index]))


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.68
Test accuracy: 0.71
[0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.69
Test accuracy: 0.69
[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.66
Test accuracy: 0.72
[0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.66
Test accuracy: 0.72
[0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 4
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.69
Test accuracy: 0.72
[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14] 5
Epoch 1/10
Epoch 2/10
Epoc

In [82]:
test_acc

[0.7142857313156128,
 0.5428571701049805,
 0.7222222089767456,
 0.7222222089767456,
 0.7222222089767456,
 0.7222222089767456,
 0.7222222089767456,
 0.5555555820465088,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664]

In [85]:
test_acc

[0.7142857313156128,
 0.6857143044471741,
 0.7222222089767456,
 0.7222222089767456,
 0.7222222089767456,
 0.7222222089767456,
 0.7222222089767456,
 0.6944444179534912,
 0.7837837934494019,
 0.3243243098258972,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664,
 0.7027027010917664]

In [81]:
average_acc = sum(test_acc)/len(test_acc)

In [43]:
groups = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
logo = LeaveOneGroupOut()
logo.get_n_splits(all_subjects_X, all_subjects_y, groups)


for train_index, test_index in logo.split(all_subjects_X, all_subjects_y, groups):

    # Define the model architecture
    num_signals = 6
    num_output_class = 2
    model = tf.keras.Sequential()
    #input_shape = 14 Signale (bei uns max. 6) X 210 Inputs (aus Tabelle nach Fourier)
    model.add(tf.keras.layers.InputLayer(input_shape=[num_signals, 210, 1]))
    model.add(tf.keras.layers.Conv2D(filters=64 ,activation='relu', kernel_size=(1,3), strides=1, padding='same')) 
    #hier fehlen: Output shape, was bringt mir param#, kernel_size != Kernels, Size?
    model.add(tf.keras.layers.Dropout(rate=0.3)) # portion gibts nicht? und es ist 'rate' gefordert???
    model.add(tf.keras.layers.Conv2D(filters=64, activation='relu', kernel_size=(1,3), strides=1, padding='same'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(1,2)))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.Conv2D(filters=64, activation='relu', kernel_size=(1,3), strides=1, padding='same'))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(1,2)))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units=128, activation='relu', kernel_initializer='glorot_uniform'))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    model.add(tf.keras.layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform'))
    model.add(tf.keras.layers.Dropout(rate=0.3))
    #Anzahl der Units = Anzahl der Klassen (2 - non-stress vs stress)
    model.add(tf.keras.layers.Dense(units=num_output_class, activation='softmax', kernel_initializer='glorot_uniform'))

    tf.keras.backend.clear_session()

    model.compile(optimizer="rmsprop",loss='binary_crossentropy', metrics=['accuracy'])

    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)
    X_test = np.asarray(X_test)
    y_test = np.asarray(y_test)
    X_val = np.asarray(X_val)
    y_val = np.asarray(y_val)


y_train = tf.keras.utils.to_categorical(y_train, num_output_class)
y_test = tf.keras.utils.to_categorical(y_test, num_output_class)
y_val = tf.keras.utils.to_categorical(y_val, num_output_class)


    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = all_subjects_X[train_index], all_subjects_X[test_index]
    y_train, y_test = all_subjects_y[train_index], all_subjects_y[test_index]
    print(X_train, X_test, y_train, y_test)

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2

    # X_train = np.asarray(X_train)
    # y_train = np.asarray(y_train)
    # X_test = np.asarray(X_test)
    # y_test = np.asarray(y_test)
    # X_val = np.asarray(X_val)
    # y_val = np.asarray(y_val)


    # y_train = tf.keras.utils.to_categorical(y_train, num_output_class)
    # y_test = tf.keras.utils.to_categorical(y_test, num_output_class)
    # y_val = tf.keras.utils.to_categorical(y_val, num_output_class)

TRAIN: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14] TEST: [0]


TypeError: only integer scalar arrays can be converted to a scalar index

In [299]:
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_33 (Conv2D)          (None, 6, 210, 64)        256       
                                                                 
 dropout_66 (Dropout)        (None, 6, 210, 64)        0         
                                                                 
 conv2d_34 (Conv2D)          (None, 6, 210, 64)        12352     
                                                                 
 max_pooling2d_22 (MaxPoolin  (None, 6, 105, 64)       0         
 g2D)                                                            
                                                                 
 dropout_67 (Dropout)        (None, 6, 105, 64)        0         
                                                                 
 conv2d_35 (Conv2D)          (None, 6, 105, 64)        12352     
                                                     

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2

X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)
X_val = np.asarray(X_val)
y_val = np.asarray(y_val)


y_train = tf.keras.utils.to_categorical(y_train, num_output_class)
y_test = tf.keras.utils.to_categorical(y_test, num_output_class)
y_val = tf.keras.utils.to_categorical(y_val, num_output_class)

In [53]:
print(len(y))
print(len(y_train))
print(len(y_test))
print(len(y_val))

37
21
8
8


In [301]:
model.compile(optimizer="rmsprop",loss='binary_crossentropy', metrics=['accuracy'])

In [302]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "wesad_binary.h5",      # Path to save the model file
    monitor="val_loss", # The metric name to monitor
    save_best_only=True # If True, it only saves the "best" model according to the quantity monitored 
)

In [303]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", # Quantity to be monitored.
    min_delta=0.01,     # Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.
    patience=5,        # Number of epochs with no improvement after which training will be stopped.
)

In [304]:
history = model.fit(
    x=X_train, 
    y=y_train,
    validation_data=(X_val, y_val),
    epochs=10, 
    batch_size=50,
    callbacks = [checkpoint, early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [305]:
score = model.evaluate(X_test, y_test, verbose=0) 
print('Test loss:', round(score[0], 2)) 
print('Test accuracy:', round(score[1], 2))

Test loss: 4.04
Test accuracy: 0.75


# Fragen

- Wie und wann Labels entfernen?