# Hand movement detection

In [1]:
# ! unzip -q task4.zip

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
import re

In [4]:
# Get column names and clean them
col_names = [col.strip() for col in pd.read_csv('/content/HandRight.csv').columns]
# compile pattern to extract label
pat = re.compile(r'([A-Z][a-z]+)\.csv')

# Load the treaining data
training_data = pd.DataFrame([], columns=col_names)
for root, subdirs, files in os.walk('/content/Training/'):
  if root.split('/')[-1] == 'not relevant' or len(files) == 0:
    continue
  # print(root)
  classes = [re.findall(pat, f)[0] for f in files]
  for file in files:
    label = re.findall(pat, file)[0]
    skiprows = range(1, 7) if label == 'Alone' else range(1, 14)
    df = pd.read_csv(os.path.join(root, file), skiprows=skiprows)
    df.columns = col_names
    df['Label'] = label
    training_data = pd.concat([training_data, df], ignore_index=True)

In [5]:
training_data.head()

Unnamed: 0,Time,Frame ID,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength,Label
0,375.7664,43877,right,2,109.8124,294.0425,23.82546,15.16876,187.2628,107.1075,0.939421,-0.733815,-0.414149,138.5343,242.4944,59.34635,292.7356,75.51405,147.6441,0.0,0.570134,0.0,Spontan
1,375.783,43879,left,2,-68.17884,184.6661,-33.41775,13.04194,178.0413,-123.5018,0.932778,0.32231,1.177865,-126.5636,154.7217,-15.63315,-262.3608,-43.82711,24.29723,0.0,0.837646,0.769844,Spontan
2,375.783,43879,right,2,110.5054,297.0931,25.75209,34.46735,159.7672,100.0027,0.966124,-0.74605,-0.425343,139.3157,244.7854,60.06323,297.6381,80.92066,146.8924,0.0,0.627651,0.0,Spontan
3,375.7995,43881,left,2,-68.41686,188.3904,-34.16297,16.83247,207.1948,-34.46944,0.875941,0.284841,1.119169,-125.537,158.2299,-12.981,-258.7485,-40.97414,32.06822,0.0,0.852355,0.837539,Spontan
4,375.7995,43881,right,2,111.6178,299.2377,27.55509,69.43216,120.5591,103.7337,0.970414,-0.725961,-0.448085,140.9998,246.8677,61.27683,295.9448,75.94328,140.2315,0.0,0.667204,0.0,Spontan


In [6]:
training_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 217568 entries, 0 to 217567
Data columns (total 23 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Time            217568 non-null  float64
 1   Frame ID        217568 non-null  object 
 2   Hand Type       217568 non-null  object 
 3   # hands         217568 non-null  object 
 4   Position X      217568 non-null  float64
 5   Position Y      217568 non-null  float64
 6   Position Z      217568 non-null  float64
 7   Velocity X      217568 non-null  float64
 8   Velocity Y      217568 non-null  float64
 9   Velocity Z      217568 non-null  float64
 10  Pitch           217568 non-null  float64
 11  Roll            217568 non-null  float64
 12  Yaw             217568 non-null  float64
 13  Wrist Pos X     217568 non-null  float64
 14  Wrist Pos Y     217568 non-null  float64
 15  Wrist Pos Z     217568 non-null  float64
 16  Elbow pos X     217568 non-null  float64
 17  Elbow Pos 

## Data Preprocessing

In [7]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

In [8]:
# Encode features
training_data['One_Hand'] = (training_data['# hands'] == 1).astype(int)
training_data['Left_Hand'] = (training_data['Hand Type'] == 'left').astype(int)

In [34]:
le = LabelEncoder().fit(training_data['Label'])
X, y = training_data.iloc[:, 4:].drop(columns='Label'), le.transform(training_data['Label'])

In [10]:
X_norm = pd.DataFrame(MinMaxScaler().fit_transform(X), columns=X.columns)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, random_state=42)

## Train models

In [12]:
import tensorflow as tf

In [16]:
# Train a model
tf.random.set_seed(0)

# Build the model
model_1 = tf.keras.Sequential([
  tf.keras.layers.Dense(10, activation='relu'),
  tf.keras.layers.Dense(3, activation='softmax')
])

# Compile the model
model_1.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

# Fit the model
model_1.fit(X_train, y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f62c3c3f590>

In [17]:
from sklearn.metrics import accuracy_score, f1_score

In [18]:
model_1_pred = model_1.predict(X_test)

In [26]:
accuracy_score(y_test, model_1_pred.argmax(axis=1))

0.8310229445506692

Let's try to improve the model by running for more epochs, and adding another layer.

In [27]:
# Train a model
tf.random.set_seed(0)

# Build the model
model_2 = tf.keras.Sequential([
  tf.keras.layers.Dense(10, activation='relu'),
  tf.keras.layers.Dense(10, activation='relu'),
  tf.keras.layers.Dense(3, activation='softmax')
])

# Compile the model
model_2.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

# Fit the model
model_2.fit(X_train, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f62c6728310>

In [30]:
model_2_pred = model_2.predict(X_test)

In [31]:
accuracy_score(y_test, model_2_pred.argmax(axis=1))

0.8488196793646124

In [33]:
f1_score(y_test, model_2_pred.argmax(axis=1), average=None)

array([0.98951166, 0.80700014, 0.81166972])

In [35]:
le.classes_

array(['Alone', 'Spontan', 'Sync'], dtype=object)

Now let's test against the actual testing data

In [37]:
## Load the test data
testing_data = pd.DataFrame([], columns=col_names)
for root, subdirs, files in os.walk('/content/Validation/'):
  if root.split('/')[-1] == 'not relevant' or len(files) == 0:
    continue
  # print(root)
  classes = [re.findall(pat, f)[0] for f in files]
  for file in files:
    label = re.findall(pat, file)[0]
    skiprows = range(1, 7) if label == 'Alone' else range(1, 14)
    df = pd.read_csv(os.path.join(root, file), skiprows=skiprows)
    df.columns = col_names
    df['Label'] = label
    testing_data = pd.concat([testing_data, df], ignore_index=True)

We need to apply the same preprocessing steps on the test data

In [38]:
# Encode features
testing_data['One_Hand'] = (testing_data['# hands'] == 1).astype(int)
testing_data['Left_Hand'] = (testing_data['Hand Type'] == 'left').astype(int)

In [39]:
# le = LabelEncoder().fit(testing_data['Label'])
X_test_final, y_test_final = testing_data.iloc[:, 4:].drop(columns='Label'), le.transform(testing_data['Label'])

In [40]:
X_test_final_norm = pd.DataFrame(MinMaxScaler().fit_transform(X_test_final), columns=X_test_final.columns)

Calculate accuracy and F1-score for the two models

In [None]:
# model 1 predictions
model_1_final_pred = model_1.predict(X_test_final_norm)

In [None]:
accuracy_score(y_test_final, model_1_final_pred.argmax(axis=1))

0.6604307582769668

In [None]:
f1_score(y_test_final, model_1_final_pred.argmax(axis=1), average=None)

array([0.91821424, 0.44960403, 0.67397631])

In [41]:
# model 2 predictions
model_2_final_pred = model_2.predict(X_test_final_norm)

In [42]:
accuracy_score(y_test_final, model_2_final_pred.argmax(axis=1))

0.6247953008187968

In [43]:
f1_score(y_test_final, model_2_final_pred.argmax(axis=1), average=None)

array([0.96925139, 0.33478763, 0.64331353])

Model_2, surprisingly, gets a lower accuracy score on the test set, although it had better results on the validation set.  
However, the F1 score for the Alone class is better than model_1.  
Maybe it is because model_2 is overfitting on that specific class.

### Test the models against the HandRight.csv data

In [54]:
# Read the data
right_hand = pd.read_csv('/content/HandRight.csv', skiprows=range(1, 7))
right_hand.columns = col_names

We need to apply the same preprocessing steps on the test data

In [56]:
# Encode features
right_hand['One_Hand'] = (right_hand['# hands'] == 1).astype(int)
right_hand['Left_Hand'] = (right_hand['Hand Type'] == 'left').astype(int)
right_hand['Label'] = 'Alone'

In [59]:
# le = LabelEncoder().fit(testing_data['Label'])
X_test_right_hand, y_test_right_hand = right_hand.iloc[:, 4:].drop(columns='Label'), le.transform(right_hand['Label'])

In [60]:
X_test_right_hand_norm = pd.DataFrame(MinMaxScaler().fit_transform(X_test_right_hand), columns=X_test_right_hand.columns)

Calculate accuracy and F1-score for the two models

In [61]:
# model 1 predictions
model_1_right_hand_pred = model_1.predict(X_test_right_hand_norm)

In [62]:
accuracy_score(y_test_right_hand, model_1_right_hand_pred.argmax(axis=1))

0.9107365792759051

In [63]:
f1_score(y_test_right_hand, model_1_right_hand_pred.argmax(axis=1), average=None)

array([0.95328324, 0.        , 0.        ])

In [64]:
# model 2 predictions
model_2_right_hand_pred = model_2.predict(X_test_right_hand_norm)

In [67]:
accuracy_score(y_test_right_hand, model_2_right_hand_pred.argmax(axis=1))

0.9837702871410736

In [68]:
f1_score(y_test_right_hand, model_2_right_hand_pred.argmax(axis=1), average=None)

array([0.99181875, 0.        , 0.        ])

Now we can clearly see that model_2 outperforms model_1 on the Alone class