In [1]:
import pandas as pd
import numpy as np
import keras
from tqdm.notebook import tqdm
from IPython.display import display
import sklearn
from matplotlib import pyplot as plt
from keras.layers import LSTM, Dropout, Dense, BatchNormalization, Flatten, Input
from sklearn.metrics import roc_curve
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
#https://machinelearningmastery.com/how-to-develop-rnn-models-for-human-activity-recognition-time-series-classification/

To do:

- Load alle trials i stedte for data (all_readings = np.load('all_readings.npy', allow_pickle=True))
- Ryd op i de trials.
- Generér uniform trials (3D array med [batch, samples,features])
    - Altså, cellen med #Make all trials same length - Save this cell
    - batch er alle dine træningspunkter
    - samples er dine time series (små udklip af eye tracking)
    - features er... ja, features.
- Få LSTM til at virke med din data - skidevære med metrics.
- Når det virker kan du rode rundt med ordentlig klassifikation
- Evt separer saccades og fixations?

In [2]:
target = 'gender' #'age'
even_gender = False

sample_length = 10
seed = 42

epochs=40
verbose=True
batch_size=32

np.random.seed(seed) 

In [None]:
data = np.load('all_readings.npy', allow_pickle=True)
data = [i for i in data if len(i)>sample_length]

if even_gender:
    data = data[:-52]

In [None]:
def make_trials_uniform(trials):
    uniform_X = []
    uniform_y = []
    show = True
    for trial in tqdm(trials):
        X, y = cleanup_reading(trial, show=show)
        show=False
        length = X.shape[0]
        for i in range(sample_length, length-length%sample_length, sample_length):
            uniform_X.append(X.iloc[i-sample_length:i])
            uniform_y.append(y.values[0])

    return np.array(uniform_X), np.array(uniform_y)

def experimental(trial):    
    trial = trial[trial['eventType']==0]
    trial = trial.drop('eventType', axis=1)
    trial = trial.replace(0,np.nan).dropna(axis=1,how="all")
    trial['false'] = np
    return trial[['duration','meanPupilDiameter']]

    

def cleanup_reading(trial, show=False):
    trial = trial[trial.columns[1:]]
    trial = trial[trial['eye']=='right']
    trial = trial.drop('eye',1)
    trial = trial.drop('eventIdxLeft',1)
    trial = trial.drop('eventIdxRight',1)
    trial['eventType'] = trial['eventType'].replace(['saccade'], 1)
    trial['eventType'] = trial['eventType'].replace(['fixation'], 0)
    trial = trial.fillna(0)
    
    #trial = experimental(trial)
    
    feats = trial.columns
    trial = trial.reset_index()
    
    if show:
        print("Your features")
        display(trial.head())
        
    return trial[feats[:-2]], trial[feats[-2:]]

In [None]:
np.random.shuffle(data)
X,y = make_trials_uniform(data)
X,y = sklearn.utils.shuffle(X, y, random_state=seed)
y = y-1

if target == 'gender':
    y = y[:,0] #Only tage gender
else:
    y = y[:,1]

y = y.reshape(-1, 1)
print(f'X shape: {X.shape}, y shape: {y.shape}')

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=seed)

In [None]:
model = keras.Sequential()
model.add(Input(shape=(X_train[0].shape)))
#model.add(BatchNormalization(input_shape = (32,1)))
model.add(LSTM(10))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss="binary_crossentropy"
              , metrics=[keras.metrics.binary_accuracy]
              , optimizer="adam")

model.compile(
    optimizer='adam', 
    loss='binary_crossentropy', 
    metrics=['accuracy']
)

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=epochs, 
                         batch_size=batch_size, callbacks=[callback])

In [None]:
plt.plot(history.history['loss'], label="loss");
plt.plot(history.history['accuracy'], label="acc");
plt.plot(history.history['val_loss'], '--', label="val loss");
plt.plot(history.history['val_accuracy'], '--', label="val acc");
plt.legend();

In [None]:
y_pred= model.predict(X_test).ravel()
y_pred = (np.floor(y_pred+0.5)*2)-1
fpr, tpr, thresholds = roc_curve((y_test*2)-1, y_pred, pos_label=1)

dummy = DummyClassifier()
dummy.fit(X_train, y_train)
dum_y_pred= dummy.predict(X_test).ravel()
dum_fpr, dum_tpr, dum_thresholds = roc_curve((y_test*2)-1, dum_y_pred, pos_label=1)

# rfc = RandomForestClassifier()
# rfc.fit(X_train, y_train)
# rfc_y_pred= rfc.predict(X_test).ravel()
# rfc_fpr, rfc_tpr, rfc_thresholds = roc_curve(y_test-1, rfc_y_pred-1)

In [None]:
# fig = plt.figure(figsize=(8,6))
plt.plot(fpr,tpr,label="LSTM")
plt.plot(dum_fpr,dum_tpr,'--',label="Dummy", c='purple')
plt.title("ROC curve for LSTM")
plt.legend()

In [None]:
np.bincount(model.predict(X_test).ravel().astype(int))

In [None]:
plt.plot(X_train[0])
plt.plot(X_train[2])

In [None]:
y_train[0], y_train[2]

In [None]:
y_train