In [0]:
# !pip install tensorflow==1.14

In [0]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score,f1_score,roc_auc_score
import matplotlib.pyplot as plt
from sklearn import metrics
import glob
from scipy import signal

Reading train data

In [0]:
#read csv file from the path
xpath = r'Dataset/train_set/features/' 
xfiles = glob.glob(xpath + "/*.csv")
ypath = r'Dataset/train_set/labels/' 
yfiles = glob.glob(ypath + "/*.csv")

xli = []
yli = []

for f in xfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    xli.append(df)

for f in yfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    yli.append(df)
    
#concatatenate all the dataframes in the li
x_df = pd.concat(xli, axis=0, ignore_index=True)
x_df = x_df.iloc[:,1:]
x_data=x_df.values

y_df = pd.concat(yli, axis=0, ignore_index=True)
y_df = y_df.iloc[:,1:]
y_data=y_df.values

Standardization

In [0]:
Xsc = StandardScaler().fit_transform(x_data)
Xsc=pd.DataFrame(Xsc)
y=pd.DataFrame(y_data)

## WIthout PCA and without Filter

Reading Test data

In [0]:
#read csv file from the path
xtestpath = r'Dataset/test_set/features/' 
xtestfiles = glob.glob(xtestpath + "/*.csv")
ytestpath = r'Dataset/test_set/labels/' 
ytestfiles = glob.glob(ytestpath + "/*.csv")

xtestli = []
ytestli = []

for f in xtestfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    xtestli.append(df)

for f in ytestfiles:
    df = pd.read_csv(f, index_col=None, header=0)
    ytestli.append(df)
    
#concatatenate all the dataframes in the li
x_ts_df = pd.concat(xtestli, axis=0, ignore_index=True)
x_ts_df = x_ts_df.iloc[:,1:]
x_val_data=x_ts_df.values
x_val_data=StandardScaler().fit_transform(x_val_data)

y_ts_df = pd.concat(ytestli, axis=0, ignore_index=True)
y_ts_df = y_ts_df.iloc[:,1:]
y_val_data=y_ts_df.values

In [0]:
y_pred = np.empty((len(x_val_data),6))

for i in range(6):
    clf = LogisticRegression(random_state=0,max_iter=20000).fit(x_data, y_data[:,i])
    y_pred[:,i] = clf.predict_proba(x_val_data)[:,1]

In [0]:
temp = np.array(y_pred).reshape(len(y_pred),6)
prediction_df = pd.DataFrame(temp)
m = np.zeros_like(prediction_df.values)
m[np.arange(len(prediction_df)), prediction_df.values.argmax(1)] = 1

prediction_df = pd.DataFrame(m, columns = prediction_df.columns).astype(int)
y_actual_df = pd.DataFrame(y_val_data)

calculating the performance metrics

In [0]:
events=['HandStart','FirstDigitTouch','BothStartLoadPhase','LiftOff','Replace','BothReleased']
for i in range(6):
  # accuracy: (tp + tn) / (p + n)
  accuracy = accuracy_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('Accuracy for %s: %f' %(events[i],accuracy))
  # precision tp / (tp + fp)
  precision = precision_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('precision for %s: %f' %(events[i],precision))
  # recall: tp / (tp + fn)
  recall = recall_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('recall for %s: %f' %(events[i],recall))
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('f1_score for %s: %f' %(events[i],f1))
  print('\n')


plotting the AUC

In [0]:
plt.figure(0).clf()
for i in range(6):
  fpr, tpr, thresh = metrics.roc_curve(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  auc = metrics.roc_auc_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  plt.plot(fpr,tpr,label=str(events[i])+", auc="+str(round(auc,3)))
plt.legend(loc=0)

## PCA

In [0]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 21)
pca.fit(Xsc)
X_train_pca = pca.transform(Xsc)

Calculating PCA components 

In [0]:
pca = PCA(n_components = 21)
pca.fit(x_val_data)
X_test_pca = pca.transform(x_val_data)

In [0]:
y_pred = np.empty((len(x_val_data),6))

for i in range(6):
    clf = LogisticRegression(random_state=0,max_iter=50000).fit(X_train_pca, y_data[:,i])
    y_pred[:,i] = clf.predict_proba(X_test_pca)[:,1]

In [0]:
temp = np.array(y_pred).reshape(len(y_pred),6)
prediction_df = pd.DataFrame(temp)
m = np.zeros_like(prediction_df.values)
m[np.arange(len(prediction_df)), prediction_df.values.argmax(1)] = 1

prediction_df = pd.DataFrame(m, columns = prediction_df.columns).astype(int)
y_actual_df = pd.DataFrame(y_val_data)

In [0]:
events=['HandStart','FirstDigitTouch','BothStartLoadPhase','LiftOff','Replace','BothReleased']
for i in range(6):
  # accuracy: (tp + tn) / (p + n)
  accuracy = accuracy_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('Accuracy for %s: %f' %(events[i],accuracy))
  # precision tp / (tp + fp)
  precision = precision_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('precision for %s: %f' %(events[i],precision))
  # recall: tp / (tp + fn)
  recall = recall_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('recall for %s: %f' %(events[i],recall))
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('f1_score for %s: %f' %(events[i],f1))
  print('\n')

In [0]:
plt.figure(0).clf()
for i in range(6):
  fpr, tpr, thresh = metrics.roc_curve(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  auc = metrics.roc_auc_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  plt.plot(fpr,tpr,label=str(events[i])+", auc="+str(round(auc,3)))
plt.legend(loc=0)

## PCA and Filter

In [0]:
x_data_df=x_df.copy()
x_test_df=x_ts_df.copy()

In [0]:
fs = 1000  # Sampling frequency
fc = 30  # Cut-off frequency of the filter
w = fc / (fs / 2) # Normalize the frequency
for col in x_data_df.columns:
  b, a = signal.butter(8, w, 'low')
  x_data_df[col] = signal.filtfilt(b, a, x_data_df[col].values)

In [0]:
Xsc = StandardScaler().fit_transform(x_data_df.values)
Xsc=pd.DataFrame(Xsc)
y=pd.DataFrame(y_data)

applying the filter

In [0]:
pca_filter = PCA(n_components = 21)
pca_filter.fit(Xsc)
X_train_filter_pca = pca_filter.transform(Xsc)

In [0]:
for col in x_test_df.columns:
  b, a = signal.butter(8, w, 'low')
  x_test_df[col] = signal.filtfilt(b, a, x_test_df[col].values)
x_test_sc=StandardScaler().fit_transform(x_test_df.values)
x_test_sc=pd.DataFrame(x_test_sc)

In [0]:
pca_filter = PCA(n_components = 21)
pca_filter.fit(x_test_sc)
X_test_filter_pca = pca_filter.transform(x_test_sc)

In [0]:
y_pred = np.empty((len(x_val_data),6))

for i in range(6):
    clf = LogisticRegression(random_state=0,max_iter=20000).fit(X_train_filter_pca, y_data[:,i])
    y_pred[:,i] = clf.predict_proba(X_test_filter_pca)[:,1]

In [0]:
temp = np.array(y_pred).reshape(len(y_pred),6)
prediction_df = pd.DataFrame(temp)
m = np.zeros_like(prediction_df.values)
m[np.arange(len(prediction_df)), prediction_df.values.argmax(1)] = 1

prediction_df = pd.DataFrame(m, columns = prediction_df.columns).astype(int)
y_actual_df = pd.DataFrame(y_val_data)

In [0]:
events=['HandStart','FirstDigitTouch','BothStartLoadPhase','LiftOff','Replace','BothReleased']
for i in range(6):
  # accuracy: (tp + tn) / (p + n)
  accuracy = accuracy_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('Accuracy for %s: %f' %(events[i],accuracy))
  # precision tp / (tp + fp)
  precision = precision_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('precision for %s: %f' %(events[i],precision))
  # recall: tp / (tp + fn)
  recall = recall_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('recall for %s: %f' %(events[i],recall))
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  print('f1_score for %s: %f' %(events[i],f1))
  print('\n')

In [0]:
plt.figure(0).clf()
for i in range(6):
  fpr, tpr, thresh = metrics.roc_curve(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  auc = metrics.roc_auc_score(y_actual_df.iloc[:,i].values,prediction_df.iloc[:,i].values)
  plt.plot(fpr,tpr,label=str(events[i])+", auc="+str(round(auc,3)))
plt.legend(loc=0)