In [0]:
# !pip install tensorflow==1.14

In [0]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense,BatchNormalization, Dropout, Input
from keras.optimizers import adam
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
from sklearn import metrics
import glob
from scipy import signal

Reading Train data

In [0]:
#read csv file from the path
xpath = r'Dataset/train_set/features/' 
xfiles = glob.glob(xpath + "/*.csv")
ypath = r'Dataset/train_set/labels/' 
yfiles = glob.glob(ypath + "/*.csv")

xli = []
yli = []

for f in xfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    xli.append(df)

for f in yfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    yli.append(df)
    
#concatatenate all the dataframes in the li
x_df = pd.concat(xli, axis=0, ignore_index=True)
x_df = x_df.iloc[:,1:]
x_data=x_df.values

y_df = pd.concat(yli, axis=0, ignore_index=True)
y_df = y_df.iloc[:,1:]
y_data=y_df.values

Standardization

In [0]:
Xsc = StandardScaler().fit_transform(x_data)
Xsc=pd.DataFrame(Xsc)
y=pd.DataFrame(y_data)

Generator function

In [0]:
time_steps=1000
subsample=50
def generator_seq(Xtr,batch_size,featr):
    while 1:
        x_time_data = np.zeros((batch_size, time_steps//subsample, featr))
        yy = []
        random_index = np.random.randint(0, len(Xtr)-2*time_steps)
        for i in range(random_index,random_index+batch_size):
            x_time_data[i-random_index] = Xtr[i:i+time_steps:subsample]
            yy.append(y_data[i + time_steps])
        yy = np.asarray(yy)
        yield x_time_data.reshape((x_time_data.shape[0],x_time_data.shape[1], x_time_data.shape[2])), yy

## Without PCA and without Filter model

In [0]:
model = Sequential()
model.add(LSTM(128, input_shape = (time_steps//subsample, 32)))
model.add(Dropout(0.05))
model.add(Dense(32, activation = "relu"))
model.add(Dense(6, activation = "sigmoid"))
ad = adam(lr = 0.001)
model.compile(optimizer=ad, loss='binary_crossentropy')
model.summary()

In [0]:
model.fit_generator(generator_seq(Xsc,32,32),steps_per_epoch=20000, epochs=21)

Reading Test data

In [0]:
#read csv file from the path
xtestpath = r'Dataset/test_set/features/' 
xtestfiles = glob.glob(xtestpath + "/*.csv")
ytestpath = r'Dataset/test_set/labels/' 
ytestfiles = glob.glob(ytestpath + "/*.csv")

xtestli = []
ytestli = []

for f in xtestfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    xtestli.append(df)

for f in ytestfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    ytestli.append(df)
    
#concatatenate all the dataframes in the li
x_ts_df = pd.concat(xtestli, axis=0, ignore_index=True)
x_ts_df = x_ts_df.iloc[:,1:]
x_val_data=x_ts_df.values
x_val_data=StandardScaler().fit_transform(x_val_data)

y_ts_df = pd.concat(ytestli, axis=0, ignore_index=True)
y_ts_df = y_ts_df.iloc[:,1:]
y_val_data=y_ts_df.values

Generator for test data

In [0]:
def val_generator(x_val,featr):
    while 1:
        batch_size = 1
        x_time_data = np.zeros((batch_size, time_steps//subsample, featr))
        yy = []
        random_index = np.random.randint(0, len(x_val)-2*time_steps)
        for i in range(random_index,random_index+batch_size):
            x_time_data[i-random_index] = x_val[i:i+time_steps:subsample]
            yy.append(y_val_data[i + time_steps])
        yy = np.asarray(yy)
        yield x_time_data.reshape((x_time_data.shape[0],x_time_data.shape[1], x_time_data.shape[2])), yy

In [0]:
gen_data = val_generator(x_val_data,32)
scores = []
predictions = []
y_actual = []
num_test = 1000
for i in range(num_test):
  x_test, y_test = next(gen_data)
  while not 1 in y_test:
    x_test, y_test = next(gen_data)

  y_out = model.predict(x_test)
  predictions.append(y_out)
  y_actual.append(y_test)

In [0]:
temp = np.array(predictions).reshape(1000,6)
prediction_df = pd.DataFrame(temp)
m = np.zeros_like(prediction_df.values)
m[np.arange(len(prediction_df)), prediction_df.values.argmax(1)] = 1
prediction_df = pd.DataFrame(m, columns = prediction_df.columns).astype(int)
temp = np.concatenate(y_actual).reshape(1000,6)
y_actual_df = pd.DataFrame(temp)

Calculating the performance metrics

In [0]:
events=['HandStart','FirstDigitTouch','BothStartLoadPhase','LiftOff','Replace','BothReleased']
for i in range(6):
  # accuracy: (tp + tn) / (p + n)
  accuracy = accuracy_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('Accuracy for %s: %f' %(events[i],accuracy))
  # precision tp / (tp + fp)
  precision = precision_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('precision for %s: %f' %(events[i],precision))
  # recall: tp / (tp + fn)
  recall = recall_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('recall for %s: %f' %(events[i],recall))
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('f1_score for %s: %f' %(events[i],f1))
  print('\n')


Plotting AUC

In [0]:
plt.figure(0).clf()
for i in range(6):
  fpr, tpr, thresh = metrics.roc_curve(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  auc = metrics.roc_auc_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  plt.plot(fpr,tpr,label=str(events[i])+", auc="+str(round(auc,3)))
plt.legend(loc=0)

## PCA

In [0]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 21)
pca.fit(Xsc)
X_train_pca = pca.transform(Xsc)

In [0]:
model_pca = Sequential()
model_pca.add(LSTM(128, input_shape = (time_steps//subsample, 21)))
model_pca.add(Dropout(0.05))
model_pca.add(Dense(32, activation = "relu"))
model_pca.add(Dense(6, activation = "sigmoid"))
ad = adam(lr = 0.001)
model_pca.compile(optimizer=ad, loss='binary_crossentropy')
model_pca.summary()

In [0]:
model_pca.fit_generator(generator_seq(X_train_pca,32,21),steps_per_epoch=20000, epochs=21)

Epoch 1/1


<keras.callbacks.callbacks.History at 0x7f180c7f4908>

Finding PCA components

In [0]:
pca = PCA(n_components = 21)
pca.fit(x_val_data)
X_test_pca = pca.transform(x_val_data)

In [0]:
gen_data = val_generator(X_test_pca,21)
scores = []
predictions = []
y_actual = []
num_test = 1000

for i in range(num_test):
  x_test, y_test = next(gen_data)
  while not 1 in y_test:
    x_test, y_test = next(gen_data)
    
  y_out = model_pca.predict(x_test)
  predictions.append(y_out)
  y_actual.append(y_test)

In [0]:
temp = np.array(predictions).reshape(1000,6)
prediction_df = pd.DataFrame(temp)
m = np.zeros_like(prediction_df.values)
m[np.arange(len(prediction_df)), prediction_df.values.argmax(1)] = 1
prediction_df = pd.DataFrame(m, columns = prediction_df.columns).astype(int)
temp = np.concatenate(y_actual).reshape(1000,6)
y_actual_df = pd.DataFrame(temp)

In [0]:
events=['HandStart','FirstDigitTouch','BothStartLoadPhase','LiftOff','Replace','BothReleased']
for i in range(6):
  # accuracy: (tp + tn) / (p + n)
  accuracy = accuracy_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('Accuracy for %s: %f' %(events[i],accuracy))
  # precision tp / (tp + fp)
  precision = precision_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('precision for %s: %f' %(events[i],precision))
  # recall: tp / (tp + fn)
  recall = recall_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('recall for %s: %f' %(events[i],recall))
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('f1_score for %s: %f' %(events[i],f1))
  print('\n')

In [0]:
plt.figure(0).clf()
for i in range(6):
  fpr, tpr, thresh = metrics.roc_curve(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  auc = metrics.roc_auc_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  plt.plot(fpr,tpr,label=str(events[i])+", auc="+str(round(auc,3)))
plt.legend(loc=0)

## PCA and Filter

In [0]:
x_data_df=x_df.copy()
x_test_df=x_ts_df.copy()

In [0]:
fs = 1000  # Sampling frequency
fc = 30  # Cut-off frequency of the filter
w = fc / (fs / 2) # Normalize the frequency
for col in x_data_df.columns:
  b, a = signal.butter(8, w, 'low')
  x_data_df[col] = signal.filtfilt(b, a, x_data_df[col].values)

In [0]:
Xsc = StandardScaler().fit_transform(x_data_df.values)
Xsc=pd.DataFrame(Xsc)
y=pd.DataFrame(y_data)

Applying the filter

In [0]:
pca_filter = PCA(n_components = 21)
pca_filter.fit(Xsc)
X_train_filter_pca = pca_filter.transform(Xsc)

In [0]:
model_pca_filter = Sequential()
model_pca_filter.add(LSTM(128, input_shape = (time_steps//subsample, 21)))
model_pca_filter.add(Dropout(0.05))
model_pca_filter.add(Dense(32, activation = "relu"))
model_pca_filter.add(Dense(6, activation = "sigmoid"))
ad = adam(lr = 0.001)
model_pca_filter.compile(optimizer=ad, loss='binary_crossentropy')
model_pca_filter.summary()

In [0]:
model_pca_filter.fit_generator(generator_seq(X_train_filter_pca,32,21),steps_per_epoch=20000, epochs=21)

In [0]:
for col in x_test_df.columns:
  b, a = signal.butter(8, w, 'low')
  x_test_df[col] = signal.filtfilt(b, a, x_test_df[col].values)
x_test_sc=StandardScaler().fit_transform(x_test_df.values)
x_test_sc=pd.DataFrame(x_test_sc)

In [0]:
pca_filter = PCA(n_components = 21)
pca_filter.fit(x_test_sc)
X_test_filter_pca = pca_filter.transform(x_test_sc)

In [0]:
gen_data = val_generator(X_test_filter_pca,21)
scores = []
predictions = []
y_actual = []
num_test = 1000

for i in range(num_test):
  x_test, y_test = next(gen_data)
  while not 1 in y_test:
    x_test, y_test = next(gen_data)
    
  y_out = model_pca_filter.predict(x_test)
  predictions.append(y_out)
  y_actual.append(y_test)

In [0]:
temp = np.array(predictions).reshape(1000,6)
prediction_df = pd.DataFrame(temp)
m = np.zeros_like(prediction_df.values)
m[np.arange(len(prediction_df)), prediction_df.values.argmax(1)] = 1
prediction_df = pd.DataFrame(m, columns = prediction_df.columns).astype(int)
temp = np.concatenate(y_actual).reshape(1000,6)
y_actual_df = pd.DataFrame(temp)

In [0]:
events=['HandStart','FirstDigitTouch','BothStartLoadPhase','LiftOff','Replace','BothReleased']
for i in range(6):
  # accuracy: (tp + tn) / (p + n)
  accuracy = accuracy_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('Accuracy for %s: %f' %(events[i],accuracy))
  # precision tp / (tp + fp)
  precision = precision_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('precision for %s: %f' %(events[i],precision))
  # recall: tp / (tp + fn)
  recall = recall_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('recall for %s: %f' %(events[i],recall))
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('f1_score for %s: %f' %(events[i],f1))
  print('\n')

In [0]:
plt.figure(0).clf()
for i in range(6):
  fpr, tpr, thresh = metrics.roc_curve(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  auc = metrics.roc_auc_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  plt.plot(fpr,tpr,label=str(events[i])+", auc="+str(round(auc,3)))
plt.legend(loc=0)