This notebook shows an example training process for a bi-directional GRU network using augmented dataset (shifting time series data) 

In [1]:
import glob
import pandas as pd
from tensorflow import keras
import numpy as np
import os 
from sklearn.model_selection import StratifiedKFold
import matplotlib.pylab as plt
from tqdm import tqdm
import tensorflow as tf
import re 
import keras
from tensorflow.keras.layers import Input, TimeDistributed, GRU, Conv2D, Conv2DTranspose, MaxPooling2D, AveragePooling2D, BatchNormalization, concatenate, ConvLSTM2D, Reshape, Conv3D, Flatten, LSTM, Dense, Dropout, Add
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Bidirectional, Conv1D, MaxPooling1D, GlobalMaxPooling1D, GlobalMaxPool1D, Attention
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.models import Sequential, load_model, save_model
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.sequence import pad_sequences 
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
import random 
import time

In [2]:
train = pd.read_csv('motion_detect/train_features.csv') 
train_labels = pd.read_csv('motion_detect/train_labels.csv') 
test = pd.read_csv('motion_detect/test_features.csv') 
submission = pd.read_csv('motion_detect/sample_submission.csv')

In [3]:
X = tf.reshape(np.array(train.iloc[:,2:]), [-1,600,6]) 
X = np.asarray(X) 
X.shape

(3125, 600, 6)

In [4]:
y = train_labels['label'].values 
y.shape 

(3125,)

## Model Train and prediction

In [5]:
def build_model():  
    inputs = Input(shape = (600,6)) 
    bn = BatchNormalization()(inputs)
    bi_gru = Bidirectional(GRU(128, return_sequences = True))(bn) 
    dropout = Dropout(0.25)(bi_gru) 
    bi_gru = Bidirectional(GRU(128, return_sequences = False))(dropout) 
    dense = Dense(128, activation = 'relu')(bi_gru) 
    bn = BatchNormalization()(dense) 
    outputs = Dense(61, activation = 'softmax')(bn)   
    model = Model(inputs = inputs, outputs = outputs) 
    model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy']) 
    return model 

In [6]:
kfold = StratifiedKFold(n_splits = 5, random_state = 960418, shuffle = True)
for idx,(train_idx, val_idx) in enumerate(kfold.split(X,y)):
    print("... Validating on fold {} ...".format(idx+1)) 
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx] 
    
    ##### augment data #####
    print("... augmenting data ...")
    X_augmented = [] 
    y_augmented = [] 
    for i in tqdm(range(X_train.shape[0])): 
        for j in range(10): 
            shifted = np.roll(X_train[i], int(random.random() * 600)) 
            X_augmented.append(shifted) 
            y_augmented.append(y_train[i]) 
    X_augmented = np.asarray(X_augmented) 
    y_augmented = np.asarray(y_augmented)
    X_train = np.concatenate([X_train, X_augmented]) 
    y_train = np.concatenate([y_train, y_augmented]) 
    
    ##### train model #####  
    print("... training ...") 
    model = build_model() 
    model_path = 'motion_detect/kfold' + str(idx+1) + '/epoch_{epoch:03d}_val_{val_loss:.3f}.h5'
    learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_loss', patience = 2, verbose = 1, factor = 0.8)
    checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss', verbose = 1, save_best_only = True)
    early_stopping = EarlyStopping(monitor = 'val_loss', patience = 8) 

    model.fit(X_train,
              y_train,
              epochs=200,
              batch_size = 128,
              validation_data = (X_val, y_val),
              callbacks = [learning_rate_reduction, checkpoint, early_stopping])

 11%|█         | 270/2500 [00:00<00:00, 2694.61it/s]

... Validating on fold 1 ...
... augmenting data ...


100%|██████████| 2500/2500 [00:00<00:00, 2623.48it/s]


... training ...
Epoch 1/200

Epoch 00001: val_loss improved from inf to 2.63664, saving model to motion_detect/kfold1/epoch_001_val_2.637.h5
Epoch 2/200

Epoch 00002: val_loss improved from 2.63664 to 2.24310, saving model to motion_detect/kfold1/epoch_002_val_2.243.h5
Epoch 3/200

Epoch 00003: val_loss improved from 2.24310 to 1.77998, saving model to motion_detect/kfold1/epoch_003_val_1.780.h5
Epoch 4/200

Epoch 00004: val_loss improved from 1.77998 to 1.46073, saving model to motion_detect/kfold1/epoch_004_val_1.461.h5
Epoch 5/200

Epoch 00005: val_loss improved from 1.46073 to 1.28788, saving model to motion_detect/kfold1/epoch_005_val_1.288.h5
Epoch 6/200

Epoch 00006: val_loss improved from 1.28788 to 1.22146, saving model to motion_detect/kfold1/epoch_006_val_1.221.h5
Epoch 7/200

Epoch 00007: val_loss did not improve from 1.22146
Epoch 8/200

Epoch 00008: val_loss improved from 1.22146 to 1.15637, saving model to motion_detect/kfold1/epoch_008_val_1.156.h5
Epoch 9/200

Epoch 0

  0%|          | 0/2500 [00:00<?, ?it/s]


Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.00032768002711236477.

Epoch 00019: val_loss did not improve from 1.13280
... Validating on fold 2 ...
... augmenting data ...


100%|██████████| 2500/2500 [00:01<00:00, 2345.24it/s]


... training ...
Epoch 1/200

Epoch 00001: val_loss improved from inf to 2.73700, saving model to motion_detect/kfold2/epoch_001_val_2.737.h5
Epoch 2/200

Epoch 00002: val_loss improved from 2.73700 to 2.14274, saving model to motion_detect/kfold2/epoch_002_val_2.143.h5
Epoch 3/200

Epoch 00003: val_loss improved from 2.14274 to 1.61744, saving model to motion_detect/kfold2/epoch_003_val_1.617.h5
Epoch 4/200

Epoch 00004: val_loss improved from 1.61744 to 1.41084, saving model to motion_detect/kfold2/epoch_004_val_1.411.h5
Epoch 5/200

Epoch 00005: val_loss improved from 1.41084 to 1.25392, saving model to motion_detect/kfold2/epoch_005_val_1.254.h5
Epoch 6/200

Epoch 00006: val_loss improved from 1.25392 to 1.22073, saving model to motion_detect/kfold2/epoch_006_val_1.221.h5
Epoch 7/200

Epoch 00007: val_loss improved from 1.22073 to 1.12419, saving model to motion_detect/kfold2/epoch_007_val_1.124.h5
Epoch 8/200

Epoch 00008: val_loss did not improve from 1.12419
Epoch 9/200

Epoch 0

 13%|█▎        | 321/2500 [00:00<00:00, 3207.59it/s]


Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.00040960004553198815.

Epoch 00015: val_loss did not improve from 1.12419
... Validating on fold 3 ...
... augmenting data ...


100%|██████████| 2500/2500 [00:00<00:00, 2987.88it/s]


... training ...
Epoch 1/200

Epoch 00001: val_loss improved from inf to 2.68465, saving model to motion_detect/kfold3/epoch_001_val_2.685.h5
Epoch 2/200

Epoch 00002: val_loss improved from 2.68465 to 2.33377, saving model to motion_detect/kfold3/epoch_002_val_2.334.h5
Epoch 3/200

Epoch 00003: val_loss improved from 2.33377 to 1.88069, saving model to motion_detect/kfold3/epoch_003_val_1.881.h5
Epoch 4/200

Epoch 00004: val_loss improved from 1.88069 to 1.66642, saving model to motion_detect/kfold3/epoch_004_val_1.666.h5
Epoch 5/200

Epoch 00005: val_loss improved from 1.66642 to 1.38394, saving model to motion_detect/kfold3/epoch_005_val_1.384.h5
Epoch 6/200

Epoch 00006: val_loss improved from 1.38394 to 1.27008, saving model to motion_detect/kfold3/epoch_006_val_1.270.h5
Epoch 7/200

Epoch 00007: val_loss improved from 1.27008 to 1.20145, saving model to motion_detect/kfold3/epoch_007_val_1.201.h5
Epoch 8/200

Epoch 00008: val_loss improved from 1.20145 to 1.19759, saving model to

 12%|█▏        | 299/2500 [00:00<00:00, 2984.92it/s]


Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.00040960004553198815.

Epoch 00016: val_loss did not improve from 1.19759
... Validating on fold 4 ...
... augmenting data ...


100%|██████████| 2500/2500 [00:00<00:00, 3316.51it/s]


... training ...
Epoch 1/200

Epoch 00001: val_loss improved from inf to 2.77216, saving model to motion_detect/kfold4/epoch_001_val_2.772.h5
Epoch 2/200

Epoch 00002: val_loss improved from 2.77216 to 2.54284, saving model to motion_detect/kfold4/epoch_002_val_2.543.h5
Epoch 3/200

Epoch 00003: val_loss improved from 2.54284 to 1.85684, saving model to motion_detect/kfold4/epoch_003_val_1.857.h5
Epoch 4/200

Epoch 00004: val_loss improved from 1.85684 to 1.50082, saving model to motion_detect/kfold4/epoch_004_val_1.501.h5
Epoch 5/200

Epoch 00005: val_loss improved from 1.50082 to 1.29580, saving model to motion_detect/kfold4/epoch_005_val_1.296.h5
Epoch 6/200

Epoch 00006: val_loss improved from 1.29580 to 1.27927, saving model to motion_detect/kfold4/epoch_006_val_1.279.h5
Epoch 7/200

Epoch 00007: val_loss improved from 1.27927 to 1.16367, saving model to motion_detect/kfold4/epoch_007_val_1.164.h5
Epoch 8/200

Epoch 00008: val_loss did not improve from 1.16367
Epoch 9/200

Epoch 0

 12%|█▏        | 296/2500 [00:00<00:00, 2955.63it/s]


Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.00040960004553198815.

Epoch 00017: val_loss did not improve from 1.13621
... Validating on fold 5 ...
... augmenting data ...


100%|██████████| 2500/2500 [00:00<00:00, 3366.10it/s]


... training ...
Epoch 1/200

Epoch 00001: val_loss improved from inf to 2.56215, saving model to motion_detect/kfold5/epoch_001_val_2.562.h5
Epoch 2/200

Epoch 00002: val_loss improved from 2.56215 to 2.45252, saving model to motion_detect/kfold5/epoch_002_val_2.453.h5
Epoch 3/200

Epoch 00003: val_loss improved from 2.45252 to 1.93683, saving model to motion_detect/kfold5/epoch_003_val_1.937.h5
Epoch 4/200

Epoch 00004: val_loss improved from 1.93683 to 1.71500, saving model to motion_detect/kfold5/epoch_004_val_1.715.h5
Epoch 5/200

Epoch 00005: val_loss improved from 1.71500 to 1.50300, saving model to motion_detect/kfold5/epoch_005_val_1.503.h5
Epoch 6/200

Epoch 00006: val_loss improved from 1.50300 to 1.44938, saving model to motion_detect/kfold5/epoch_006_val_1.449.h5
Epoch 7/200

Epoch 00007: val_loss improved from 1.44938 to 1.32710, saving model to motion_detect/kfold5/epoch_007_val_1.327.h5
Epoch 8/200

Epoch 00008: val_loss improved from 1.32710 to 1.30067, saving model to

## Create submission file 

In [8]:
gru1 = load_model('motion_detect/kfold1/epoch_011_val_1.133.h5')
gru2 = load_model('motion_detect/kfold2/epoch_007_val_1.124.h5') 
gru3 = load_model('motion_detect/kfold3/epoch_008_val_1.198.h5')  
gru4 = load_model('motion_detect/kfold4/epoch_009_val_1.136.h5') 
gru5 = load_model('motion_detect/kfold5/epoch_010_val_1.241.h5')

In [9]:
test_X = tf.reshape(np.array(test.iloc[:,2:]),[-1, 600, 6])
test_X = np.asarray(test_X)
test_X.shape

(782, 600, 6)

In [10]:
pred1 = gru1.predict(test_X) 
pred2 = gru2.predict(test_X)
pred3 = gru3.predict(test_X) 
pred4 = gru4.predict(test_X) 
pred5 = gru5.predict(test_X) 

In [12]:
pred_avg = (pred1 + pred2 + pred3 + pred4 + pred5)/5.0 

In [13]:
submission.iloc[:,1:] = pred_avg 

In [14]:
submission.to_csv('motion_detect/bi_gru_5_fold.csv',index=False)