First take at EEG - hand motion data

Will try to fit to first subject's data only

- training on series 1-6, testing on series 7-8
- TODO train on downsampled every 10 points for faster computation
  - harmful for training/prediction?
- TODO should the min/max scaling be per series?
- TODO lahead is currently in "points". So needs to be changed depending on downsampling.
- TODO model "subtract" output is measured as squared error (with target = zeros)
  - can I do better? something like "binary cross-entropy"?

## some parameters

In [None]:
n_train = 120000 # number of points for training, as opposed to testing

lahead = 10 # 60 yields no classification results
batch_size = 2**10 # 2**4 # 2**10 # smaller batches lead to less loss of data when truncating non-multiples of batch_size

downsample_pts = 10

## import libraries

In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import time

# https://keras.io/layers/recurrent/#lstm
from keras.models import Sequential
from keras.layers import Dense, LSTM, Lambda, Dropout, Embedding, Flatten, Subtract, Dot

# https://keras.io/layers/recurrent/#lstm
from keras.models import Model
from keras.layers import Input, RepeatVector, TimeDistributed, Concatenate

import numpy as np

from sklearn.preprocessing import MinMaxScaler

## load data

In [None]:
def my_load(subj_ids:list, series_ids:list):
    features_all = []
    targets_all = []
    for i1 in subj_ids:
        for i2 in series_ids:
            for i3, fn in [
                ('features', 'data/raw/train/subj%i_series%i_data.csv'%(i1, i2)),
                ('targets', 'data/raw/train/subj%i_series%i_events.csv'%(i1, i2)),
            ]:
                xxx_i = pd.read_csv(fn)
                xxx_i = xxx_i.set_index('id').astype('int16')
                xxx_i = xxx_i[::downsample_pts] # downsample
                if i3=='features':
                    features_all.append(xxx_i)
                else:
                    targets_all.append(xxx_i)
            
    features_all = pd.concat(features_all, axis=0)
    targets_all = pd.concat(targets_all, axis=0)
    return features_all, targets_all

In [None]:
train_features, train_targets = my_load(subj_ids = [1], series_ids = [x+1 for x in range(8)])
train_features.shape, train_targets.shape

In [None]:
train_features.head(n=2)

In [None]:
train_targets.head(n=2)

## preprocess features

e.g. scale to [0,1], stride, truncate, etc

In [None]:
def stride_df(df, n_back):
    """
    create rolling windows for LSTM
    """
    out = []
    for i in range(n_back):
        out.append(df.shift(i).values)
        
    out = np.stack(out, axis=2)[(n_back-1):, :, :] # drop first lahead
    out = np.swapaxes(out, 1, 2)
    out = np.flip(out, axis=1) # so that the index=0 is the oldest, and index=4 is latest
    return out

stride_df_2 = lambda x: stride_df(x, lahead)

In [None]:
def my_truncate(df):
    """
    drop 1st x rows if they are not a multiple of batch_size
    """
    return df.tail(df.shape[0] - (df.shape[0]%batch_size))

In [None]:
def preprocess(x_train, y_train):
    scaler = MinMaxScaler()

    xtrain_pre = scaler.fit_transform(x_train)
    xtrain_pre = pd.DataFrame(xtrain_pre, columns=x_train.columns, index=x_train.index)
    ytrain_pre = y_train

    print('train_pre', xtrain_pre.shape, ytrain_pre.shape)
    #--------------------------------------
    xtrain_roll = stride_df_2(xtrain_pre)
    ytrain_roll = stride_df_2(ytrain_pre)

    # "meta" dataframe that will still contain the pandas index (above *_roll variables are numpy matrices)
    ztrain_roll = y_train.apply(lambda group: group.iloc[(lahead-1):])

    print('train_roll 1', xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape)
    #--------------------------------------
    # drop non-batchsize-multiple per city
    to_drop = xtrain_roll.shape[0] % batch_size
    print('drop non-multiple', to_drop)

    xtrain_roll = xtrain_roll[(to_drop):]
    ytrain_roll = ytrain_roll[(to_drop):]

    ztrain_roll = my_truncate(ztrain_roll)

    print('train_roll 2', xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape)
    
    return xtrain_roll, ytrain_roll, ztrain_roll

In [None]:
x_train = train_features.head(n=n_train).copy()
y_train = train_targets.head(n=n_train).copy()
print('x_train, y_train', x_train.shape, y_train.shape)

xtrain_roll, ytrain_roll, ztrain_roll = preprocess(x_train, y_train)
assert xtrain_roll.shape[0] > 0
xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape

In [None]:
x_train.head()

In [None]:
x_train[['Fp1', 'Fp2']].plot(figsize=(20,3))
plt.show()

## fit model: AE coupled with regression on target

In [None]:
def create_coupled():
    lstm_dim_1 = 15
    len_feat = xtrain_roll.shape[2]
    len_targ = 1
    input_shape = (lahead, len_feat, )

    # features encoder
    feat_raw = Input(shape=input_shape, name='raw_features')
    feat_enc = feat_raw
    feat_enc = LSTM(
              lstm_dim_1,
              batch_size=batch_size,
              return_sequences=False,
              activation='tanh',
              name='encoded_features')(feat_enc)

    # features decoder
    feat_rec = feat_enc
    feat_rec = RepeatVector(lahead, input_shape=(lstm_dim_1, ))(feat_rec)
    feat_rec = LSTM(lstm_dim_1,
              batch_size=batch_size,
              return_sequences=True,
              dropout=0.2,
              activation='tanh')(feat_rec)
    feat_rec = TimeDistributed(
        Dense(len_feat, activation='linear'),
        name='reconstructed_features'
    )(feat_rec)

    # target encoder
    targ_raw = Input(shape=(lahead, len_targ, ), name='raw_targets')
    targ_enc = targ_raw
    targ_enc = LSTM(
              lstm_dim_1,
              batch_size=batch_size,
              return_sequences=False,
              activation='tanh',
              name='encoded_targets')(targ_enc)

    # target decoder
    targ_rec = targ_enc
    targ_rec = RepeatVector(lahead, input_shape=(lstm_dim_1, ), name='targ_dec_1')(targ_rec)
    targ_rec = LSTM(lstm_dim_1,
              batch_size=batch_size,
              return_sequences=True,
              dropout=0.2,
              activation='tanh', name='targ_dec_2')(targ_rec)
    targ_rec = TimeDistributed(
        Dense(len_targ, activation='linear'),
        name='reconstructed_targets'
    )(targ_rec)

    # internal regressor
    out = feat_enc
    out = Dense(100           , activation='relu', name='pre_targ_dec_1')(out)
    out = Dense(lstm_dim_1    , activation='linear', name='pre_targ_dec_2')(out) # match shape of targ_enc
    
    # subtract the prediction of the encoded features from the encoded target
    to_be_zero = Subtract()([out, targ_enc])
    to_be_zero = Dot(axes=-1, name='regressed_output')([to_be_zero, to_be_zero])

    # create model
    # model_all = Model(inputs = [feat_raw, is_epidemic, weekofyear], outputs = [feat_rec, out])
    model_all = Model(inputs = [feat_raw, targ_raw], outputs = [feat_rec, targ_rec, to_be_zero])
    model_all.compile(loss='mae', optimizer='adam')
    return model_all

In [None]:
mod2 = create_coupled()
mod2.summary()

In [None]:
# actual fit
print(time.ctime(),'fit start')
history = mod2.fit(
         {   # ...[ytrain_roll['is_epidemic']], to only train on subset of epidemics
             'raw_features': xtrain_roll,
             'raw_targets': ytrain_roll[:,:,:1],
         },
         {   'reconstructed_features': xtrain_roll,
             'reconstructed_targets': ytrain_roll[:,:,:1],
             'regressed_output': ytrain_roll[:,-1,0]*0, # zeros
         },
         batch_size=batch_size,
         epochs=3000,
         initial_epoch = 17,
         verbose=2,
         #validation_data=None,
         shuffle=False
    )
print(time.ctime(),'fit end')

In [None]:
# ignore first few points since large relative to others
plt.plot(history.history['loss'], label='loss') # [5:]
#plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
# plt.title(city)
plt.show()

## save model

## extract prediction model from features to target

In [None]:
# https://github.com/keras-team/keras/blob/master/examples/mnist_transfer_cnn.py#L89

i1 = Input(shape=(lahead, xtrain_roll.shape[2]), name='raw_features')
m1 = mod2.get_layer(name='encoded_features')(i1)

m3 = m1
m3 = mod2.get_layer(name='pre_targ_dec_1')(m3)
m3 = mod2.get_layer(name='pre_targ_dec_2')(m3)
m3 = mod2.get_layer(name='targ_dec_1')(m3)
m3 = mod2.get_layer(name='targ_dec_2')(m3)
m3 = mod2.get_layer(name='reconstructed_targets')(m3)

mod3 = Model(inputs = [i1], outputs = [m3])
mod3.compile(loss='mae', optimizer='adam')

mod3.summary()

## plot trained result

In [None]:
def my_predict(np_in, index):
    
    # make prediction
    targ_rec = mod3.predict(np_in, batch_size=batch_size)
        
    # plot target reconstruction
    feat_int = 0
    pd.DataFrame({
        'actual': pd.Series(np_in['raw_targets'][:,-1,feat_int],  index=index).astype('int16'),
        'pred': pd.Series(targ_rec[:,-1,feat_int],  index=index),
    }).plot(figsize=(20,3), alpha=0.5)
    plt.title('target %i'%(feat_int))
    plt.legend()
    plt.show()
    
    # prepare output
    out = pd.DataFrame({
        'prediction': targ_rec[:,-1,0].squeeze(), 
        'id': index,
    }).set_index(['id'])
    return out

#------------------------------------

ytrain_pred = my_predict(
    {   'raw_features': xtrain_roll,
        'raw_targets':  ytrain_roll,
    },
    ztrain_roll.index,
)
ytrain_pred.shape

## predict on test data

In [None]:
n_test = train_features.shape[0] - n_train
x_test = train_features.tail(n=n_test).copy()
y_test = train_targets.tail(n=n_test).copy()
print('x_test, y_test', x_test.shape, y_test.shape)

xtest_roll, ytest_roll, ztest_roll = preprocess(x_test, y_test)
xtest_roll.shape, ytest_roll.shape, ztest_roll.shape

In [None]:
x_test.head()

In [None]:
ytest_pred = my_predict(
    {   'raw_features': xtest_roll,
        'raw_targets':  ytest_roll,
    },
    ztest_roll.index,
)
ytest_pred.shape