First take at EEG - hand motion data

Will try to fit to first subject's data only

- training on series 1-6, testing on series 7-8
  - downsampled every 10 points for faster computation
  - on gpu
    - Epoch 169/3000    - 2s    - loss: 0.0494
        - reconstructed_features_loss: 0.0445
        - reconstructed_targets_loss: 0.0037
        - regressed_output_loss: 0.0012
    - the above is for batch-size = 2**14 ~ 16'000 and loss = MAE
    - Epoch 1103/3000 - 2s - loss: 0.0383
        - reconstructed_features_loss: 0.0361
        - reconstructed_targets_loss: 0.0018
        - regressed_output_loss: 3.5944e-04

    - TODO check if better to just use multi-core with smaller batch-size ~ 32

- TODO is downsampling harmful for training/prediction?
  - downsample_pts = 1 kills the kernel
  
- TODO should the min/max scaling be per series?
- TODO lahead is currently in "points". So needs to be changed depending on downsampling.
- TODO model "subtract" output is measured as squared error (with target = zeros)
  - can I do better? something like "binary cross-entropy"?
- TODO does AE on target make sense?

## check gpu usage

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

## some parameters

In [None]:
# n_train: number of points for training, as opposed to testing
# lahead: stride data with "lahead" window size
# batch_size: keras.model.fit parameter .. smaller batches lead to less loss of data when truncating non-multiples of batch_size
# downsample_pts: 1 for no downsampling, 10 for downsample by 10
#---------------------------------------------------------
# set 1
# n_train, lahead, batch_size, downsample_pts = 120000, 10, 2**14, 10

# set 2
n_train, lahead, batch_size, downsample_pts = 1200000, 100, 2**18, 1

# set 3:
# training each subject / series separately
# Requires smaller batch_size since each series is only around 1000 pts when downsampled by 10
# n_train, lahead, batch_size, downsample_pts = 120000, 10, 2**4, 10

## import libraries

In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import time

# https://keras.io/layers/recurrent/#lstm
from keras.models import Sequential
from keras.layers import Dense, LSTM, Lambda, Dropout, Embedding, Flatten, Subtract, Dot, Activation

# https://keras.io/layers/recurrent/#lstm
from keras.models import Model
from keras.layers import Input, RepeatVector, TimeDistributed, Concatenate

import numpy as np

from sklearn.preprocessing import MinMaxScaler

## load data

In [None]:
def my_load(subj_ids:list, series_ids:list):
    features_all = []
    targets_all = []
    for i1 in subj_ids:
        for i2 in series_ids:
            for i3, fn in [
                ('features', 'data/raw/train/subj%i_series%i_data.csv'%(i1, i2)),
                ('targets', 'data/raw/train/subj%i_series%i_events.csv'%(i1, i2)),
            ]:
                print('status', i1, i2, i3)
                xxx_i = pd.read_csv(fn)
                xxx_i['subj_id'] = i1
                xxx_i['series_id'] = i2
                xxx_i = xxx_i.set_index(['subj_id', 'series_id', 'id']).astype('int16')
                xxx_i = xxx_i[::downsample_pts] # downsample
                if i3=='features':
                    features_all.append(xxx_i)
                else:
                    targets_all.append(xxx_i)
            
    features_all = pd.concat(features_all, axis=0)
    targets_all = pd.concat(targets_all, axis=0)
    return features_all, targets_all

In [None]:
train_features, train_targets = my_load(subj_ids = [1], series_ids = [x+1 for x in range(8)])
train_features.shape, train_targets.shape

In [None]:
train_features.head(n=2)

In [None]:
train_targets.head(n=2)

## preprocess features

e.g. scale to [0,1], stride, truncate, etc

In [None]:
def stride_df(df, n_back):
    """
    create rolling windows for LSTM
    """
    out = []
    for i in range(n_back):
        out.append(df.shift(i).values)
        
    out = np.stack(out, axis=2)[(n_back-1):, :, :] # drop first lahead
    out = np.swapaxes(out, 1, 2)
    out = np.flip(out, axis=1) # so that the index=0 is the oldest, and index=4 is latest
    return out

stride_df_2 = lambda x: stride_df(x, lahead)

In [None]:
def my_truncate(df):
    """
    drop 1st x rows if they are not a multiple of batch_size
    """
    return df.tail(df.shape[0] - (df.shape[0]%batch_size))

In [None]:
def preprocess(x_train, y_train):
    scaler = MinMaxScaler()
    
    def wrap_pd_df(xxx, func):
        return pd.DataFrame(
                 func(xxx), 
                 columns=xxx.columns, 
                 index=xxx.index
               )

    print('min/max start')
    # xtrain_pre = x_train.groupby(['subj_id', 'series_id']).apply(lambda xxx: scaler.fit_transform(xxx))
    xtrain_pre = ( x_train.groupby(['subj_id', 'series_id'])
                          .apply(lambda xxx: wrap_pd_df(xxx, lambda yyy: scaler.fit_transform(yyy)))
                 )
    ytrain_pre = y_train

    print('train_pre', xtrain_pre.shape, ytrain_pre.shape)
    #--------------------------------------
    # xtrain_roll = stride_df_2(xtrain_pre)
    # ytrain_roll = stride_df_2(ytrain_pre)
    xtrain_roll = (xtrain_pre.groupby(['subj_id', 'series_id'])
                             .apply(stride_df_2)
                             # .apply(lambda xxx: wrap_pd_df(xxx, stride_df_2))
                  )
    ytrain_roll = (ytrain_pre.groupby(['subj_id', 'series_id'])
                             .apply(stride_df_2)
                             # .apply(lambda xxx: wrap_pd_df(xxx, stride_df_2))
                  )

    # "meta" dataframe that will still contain the pandas index (above *_roll variables are numpy matrices)
    ztrain_roll = y_train.groupby(['subj_id', 'series_id']).apply(lambda group: group.iloc[(lahead-1):])

    print('train_roll 1', xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape)
    #return xtrain_roll, ytrain_roll, ztrain_roll

    """
    # drop non-batchsize-multiple per subject/series pair
    for (subj_id, series_id), group in xtrain_roll.groupby(['subj_id', 'series_id']):
        to_drop = group.values[0].shape[0] % batch_size
        print(subj_id, series_id, 'drop non-multiple', to_drop)
        assert to_drop < 1000

        xtrain_roll.loc[subj_id, series_id] = xtrain_roll.loc[subj_id, series_id][(to_drop):]
        ytrain_roll.loc[subj_id, series_id] = ytrain_roll.loc[subj_id, series_id][(to_drop):]
       
    ztrain_roll = ztrain_roll.groupby(['subj_id', 'series_id']).apply(my_truncate)
    print('train_roll 2', xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape)
    """
    
    # aggregate all strided matrices
    xtrain_roll = np.concatenate(xtrain_roll.values, axis=0)
    ytrain_roll = np.concatenate(ytrain_roll.values, axis=0)
    
    # drop non-batchsize-multiple, once for all
    to_drop = xtrain_roll.shape[0]%batch_size
    print('drop non-multiple', to_drop)
    xtrain_roll = xtrain_roll[(to_drop):]
    ytrain_roll = ytrain_roll[(to_drop):]
    ztrain_roll = my_truncate(ztrain_roll)
    print('train_roll 2', xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape)
    
    return xtrain_roll, ytrain_roll, ztrain_roll

In [None]:
x_train = train_features.head(n=n_train).copy()
y_train = train_targets.head(n=n_train).copy()
print('x_train, y_train', x_train.shape, y_train.shape)

xtrain_roll, ytrain_roll, ztrain_roll = preprocess(x_train, y_train)
assert xtrain_roll.shape[0] > 0
xtrain_roll.shape, ytrain_roll.shape, ztrain_roll.shape

In [None]:
x_train.head(n=2)

In [None]:
x_train[['Fp1', 'Fp2']].plot(figsize=(20,3), alpha=0.5)
plt.show()

In [None]:
xtrain_plot = pd.DataFrame(xtrain_roll[:,-1,:], columns=x_train.columns)
print(xtrain_plot.shape)
xtrain_plot[['Fp1', 'Fp2']].plot(figsize=(20,3), alpha=0.5)
# plt.title('subj_id=1, series_id=1')
plt.show()

In [None]:
y_train[['HandStart']].head(n=1000).plot(figsize=(20,3), alpha=0.5)
plt.show()

## avoid class bias

In [None]:
# calculate length of HandStart == 1
y_temp = y_train['HandStart'].diff().fillna(value=0)
y_start = y_temp[y_temp > 0]
y_end   = y_temp[y_temp < 0]
y_start.head(), y_end.head() # length = 150, at downsample=10 this becomes 15

In [None]:
# test "shift" function
# pd.DataFrame({'A': range(5)}).shift(-2)

In [None]:
def stretch_mask(mask_in, n_stretch):
    """
    n_stretch = 10 <=> stretch mask by 5 from each side
    """
    i_sig = np.ones(1*n_stretch)
    i_sig = np.convolve(mask_in, i_sig, mode='same')
    i_sig = i_sig > 0
    return i_sig


# test stretching methodology
i_one = (ytrain_roll[:,-1,0]==1)
n_show = 1000
for i_iter in [1, 20, 40]:
    print(i_iter)
    i_sig = stretch_mask(i_one[:n_show], i_iter)
    plt.plot(i_one[:n_show], label='ori')
    plt.plot(i_sig, label='stretched')
    plt.title(i_iter)
    plt.legend()
    plt.show()

In [None]:
# handstart_len = 15 # from above
#
# i_one = (ytrain_roll[:,-1,0]==1)
# i_pree = pd.DataFrame(ytrain_roll[:,-1,:1]).shift(-1*handstart_len//2).fillna(value=0)==1
# i_post = pd.DataFrame(ytrain_roll[:,-1,:1]).shift(+1*handstart_len//2).fillna(value=0)==1
# mask = i_one | i_pree.values.squeeze() | i_post.values.squeeze()

mask = (ytrain_roll[:,-1,0]==1)
mask = stretch_mask(mask, 40)

xtrain_bal = xtrain_roll[mask]
ytrain_bal = ytrain_roll[mask]
ztrain_bal = ztrain_roll[mask]

xtrain_bal.shape, ytrain_bal.shape, ztrain_bal.shape

In [None]:
xtrain_plot = pd.DataFrame(xtrain_bal[:,-1,:], columns=x_train.columns)
ytrain_plot = pd.DataFrame(ytrain_bal[:,-1,:], columns=y_train.columns)
print(xtrain_plot.shape)
xtrain_plot[['Fp1', 'Fp2']].plot(figsize=(20,3), alpha=0.5)
ytrain_plot[['HandStart']].plot(figsize=(20,3), alpha=0.5)
# plt.title('subj_id=1, series_id=1')
plt.show()

## fit model: AE coupled with regression on target

In [None]:
def create_coupled():
    lstm_dim_1 = 15
    len_feat = xtrain_roll.shape[2]
    len_targ = 1
    input_shape = (lahead, len_feat, )

    # features encoder
    feat_raw = Input(shape=input_shape, name='raw_features')
    feat_enc = feat_raw
    feat_enc = LSTM(
              lstm_dim_1,
              batch_size=batch_size,
              return_sequences=False,
              activation='tanh',
              name='encoded_features')(feat_enc)

    # features decoder
    targ_rec = feat_enc
    targ_rec = RepeatVector(lahead, input_shape=(lstm_dim_1, ))(targ_rec)
    targ_rec = LSTM(lstm_dim_1,
              batch_size=batch_size,
              return_sequences=True,
              dropout=0.2,
              activation='tanh')(targ_rec)
    targ_rec = TimeDistributed(
        # Dense(len_targ, activation='linear'),
        Dense(len_targ, activation='sigmoid'),
        name='reconstructed_targets'
    )(targ_rec)

    # create model
    model_all = Model(inputs = [feat_raw], outputs = [targ_rec])
    return model_all

In [None]:
from keras import backend as K
from keras.losses import binary_crossentropy
def double_binary_crossentropy(y_true, y_pred):
    return K.mean(binary_crossentropy(y_true, y_pred), axis=-1)


mod2 = create_coupled()
# mod2.compile(loss='mae', optimizer='adam')
mod2.compile(loss=double_binary_crossentropy, optimizer='adam')
# mod2.compile(loss=double_binary_crossentropy, optimizer='adam', sample_weight_mode="temporal")
mod2.summary()

In [None]:
def my_predict(model, np_in, index):
    
    # make prediction
    targ_rec = model.predict(np_in, batch_size=batch_size)
        
    # plot target reconstruction
    feat_int = 0
    pd.DataFrame({
        'actual': pd.Series(np_in['raw_targets'][:,-1,feat_int],  index=index).astype('int16'),
        'pred': pd.Series(targ_rec[:,-1,feat_int],  index=index),
    }).plot(figsize=(20,3), alpha=0.5)
    plt.title('target %i'%(feat_int))
    plt.legend()
    plt.show()
    
    # prepare output
    out = pd.DataFrame({
        'prediction': targ_rec[:,-1,0].squeeze(), 
        'id': index,
    }).set_index(['id'])
    return out

In [None]:
# iterate over wider masks of the HandStart = 1 and train
mask_init = (ytrain_roll[:,-1,0]==1)
# for n_stretch in [20, 40, 70, 110, 160, 220, 290, 370]:
# for n_stretch in [160, 220, 290, 370]:
for n_stretch in [290, 370]:
    print('status', n_stretch)
    mask_stretched = stretch_mask(mask_init, n_stretch)

    xtrain_bal = xtrain_roll[mask_stretched]
    ytrain_bal = ytrain_roll[mask_stretched]
    ztrain_bal = ztrain_roll[mask_stretched]

    print('shape', xtrain_bal.shape, ytrain_bal.shape, ztrain_bal.shape)

    print(time.ctime(),'fit start')
    history = mod2.fit(
             {   'raw_features': xtrain_bal,
             },
             {   'reconstructed_targets': ytrain_bal[:,:,:1],
             },
             batch_size=batch_size,
             epochs=250, # FIXME 400?
             # initial_epoch = 17,
             verbose=2,
             #validation_data=None,
             validation_split = 0.3,
             shuffle=False
        )
    print(time.ctime(),'fit end')
    
    # ignore first few points since large relative to others
    # plt.plot(history.history['loss'][5:], label='loss')
    plt.plot(history.history['loss'], label='loss') # [5:]
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.legend()
    plt.title('training loss')
    plt.show()
    
    # predict on balanced series (plots implicitly actual vs predicted)
    n_show = 1000
    ytrain_pred = my_predict(
        mod2,
        {   'raw_features': xtrain_bal[:n_show],
            'raw_targets':  ytrain_bal[:n_show],
        },
        ztrain_bal.index[:n_show],
    )
    # ytrain_pred.shape
    
    # predict on non-balanced series
    n_show = 1000
    ytrain_pred = my_predict(
        mod2,
        {   'raw_features': xtrain_roll[:n_show],
            'raw_targets':  ytrain_roll[:n_show],
        },
        ztrain_roll.index[:n_show],
    )
    # ytrain_pred.shape

## save model

## plot trained result

In [None]:
# forward and backward in stretching

In [None]:
# till 200

In [None]:
# predict on non-balanced series
n_show = 1000*5
ytrain_pred = my_predict(
    mod2,
    {   'raw_features': xtrain_roll[:n_show],
        'raw_targets':  ytrain_roll[:n_show] + 1.5,
    },
    ztrain_roll.index[:n_show],
)
ytrain_pred.shape

In [None]:
# stop at 290

In [None]:
# till n_stretch = 200

In [None]:
ytrain_pred.max()

## predict on test data

In [None]:
n_test = train_features.shape[0] - n_train
x_test = train_features.tail(n=n_test).copy()
y_test = train_targets.tail(n=n_test).copy()
print('x_test, y_test', x_test.shape, y_test.shape)

xtest_roll, ytest_roll, ztest_roll = preprocess(x_test, y_test)
xtest_roll.shape, ytest_roll.shape, ztest_roll.shape

In [None]:
n_show = 1000*5
ytest_pred = my_predict(
    mod2,
    {   'raw_features': xtest_roll[:n_show],
        'raw_targets':  ytest_roll[:n_show] + 1.1,
    },
    ztest_roll.index[:n_show],
)
ytest_pred.shape

## predict on new subject

In [None]:
subj2_features, subj2_targets = my_load(subj_ids = [2], series_ids = [x+1 for x in range(8)])
subj2_features.shape, subj2_targets.shape

In [None]:
x_subj2 = subj2_features.copy()
y_subj2 = subj2_targets.copy()
print('x_subj2, y_subj2', x_subj2.shape, y_subj2.shape)

xsubj2_roll, ysubj2_roll, zsubj2_roll = preprocess(x_subj2, y_subj2)
assert xsubj2_roll.shape[0] > 0
xsubj2_roll.shape, ysubj2_roll.shape, zsubj2_roll.shape

In [None]:
n_show = 1000*5
ysubj2_pred = my_predict(
    mod2,
    {   'raw_features': xsubj2_roll[:n_show],
        'raw_targets':  ysubj2_roll[:n_show] + 1.1,
    },
    zsubj2_roll.index[:n_show],
)
ysubj2_pred.shape