# Project 2 - ML
### Run NN : Impact of error

In [11]:
import pandas as pd 
from processing import *
import seaborn as sns
from error_impact_functions import neural_network_corr, neural_network_err
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.layers import Dropout
from sklearn.metrics import f1_score, accuracy_score
import numpy as np

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

We load the data

In [3]:
file = 'All_Relative_Results_Cleaned.parquet'
data = pd.read_parquet(file)

We clean our dataset

In [5]:
rows_with_nan = data[data.isnull().any(axis=1)]

def percentage_nan_by_row(df):
    return df.isnull().mean(axis=1)*100

percentage_nan_by_rows = percentage_nan_by_row(rows_with_nan.drop(['Participant', 'Set', 'Camera','Exercise', 'time(s)'], axis='columns'))
percentage_nan_by_rows[percentage_nan_by_rows==100].shape

(27187,)

This means that all rows with NaNs values have more than 99% of missing numerical values. Hence we can remove this rows from our data set.

In [None]:
data_cleaned = data.dropna().copy(deep=True)

# We create our Machine learning model

We want to see the influence of error on the collected data. To see if their impact is relevant, we base our training set only on correctly executed exercises and then test our prediction algorithm on the correct data set and then on the whole data set.

In [7]:
data_corr = data_cleaned[data_cleaned["Set"] == "Correct"]

Then, we reduce the dimension of the correct data set, to only focus on a few parts of detectors. We decide to keep : ankles, wrists, hips, knees, elbows, shoulders and nose as we consider them the most significant. This keeps 44 columns out of 104.

In [8]:
columns_of_interest = ['Participant','Exercise','Set','Camera','time(s)',
                     'left_ankle_x','left_ankle_y','left_ankle_z',
                     'right_ankle_x','right_ankle_y','right_ankle_z',
                     'left_wrist_x','left_wrist_y','left_wrist_z',
                     'right_wrist_x','right_wrist_y','right_wrist_z',
                     'left_hip_x','left_hip_y','left_hip_z',
                     'right_hip_x','right_hip_y','right_hip_z',
                     'left_knee_x','left_knee_y','left_knee_z',
                     'right_knee_x','right_knee_y','right_knee_z',
                     'left_elbow_x','left_elbow_y','left_elbow_z',
                     'right_elbow_x','right_elbow_y','right_elbow_z',
                     'left_shoulder_x','left_shoulder_y','left_shoulder_z',
                     'right_shoulder_x','right_shoulder_y','right_shoulder_z',
                     'nose_x', 'nose_y', 'nose_z'
                     ]

data_corr_reduc = data_corr[columns_of_interest]

We can now train our NN model on the correct dataset X_corr, Y_corr

In [13]:
# Define X_corr as coordinates of correct sets
X_corr = data_corr_reduc.drop(['Participant', 'Set', 'Camera', 'Exercise'], axis=1)

# Define Y_corr as exercise column of the correct sets
Y_corr = data_corr_reduc['Exercise']

# Normalize data
scaler = StandardScaler()
X_corr_norm = scaler.fit_transform(X_corr)

# Train-test split
X_train_corr, X_test_corr, y_train_corr, y_test_corr = train_test_split(X_corr_norm, Y_corr, train_size=0.5, test_size=0.5, random_state=42)

# Encode the target variable (assuming 'exercise' is your target variable)
label_encoder = LabelEncoder()
y_train_encoded_corr = label_encoder.fit_transform(y_train_corr)
y_test_encoded_corr = label_encoder.transform(y_test_corr)

# Convert to one-hot encoding
onehot_encoder = OneHotEncoder(sparse=False)
y_train_encoded_onehot_corr = onehot_encoder.fit_transform(y_train_encoded_corr.reshape(-1, 1))
y_test_encoded_onehot_corr = onehot_encoder.transform(y_test_encoded_corr.reshape(-1, 1))

unique_exercises = Y_corr.unique()

num_classes = len(unique_exercises)

# Neural network architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_corr.shape[1],)),
    Dropout(0.5),  # Adding dropout for regularization
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')  
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with one-hot encoded labels
model.fit(X_train_corr, y_train_encoded_corr, epochs=10, validation_data=(X_test_corr, y_test_encoded_corr))

# Get the predicted probabilities for each class
y_pred_probs_corr = model.predict(X_test_corr)

# Convert predicted probabilities to class labels
y_pred_corr = np.argmax(y_pred_probs_corr, axis=1)

# Assuming y_test_encoded_onehot is the true labels in one-hot encoded form
# Convert one-hot encoded labels to integers
y_test_corr = np.argmax(y_test_encoded_onehot_corr, axis=1)

# Evaluate the model on the test set with categorical_crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Convert one-hot encoded labels to integers
y_true_corr = np.argmax(y_test_encoded_onehot_corr, axis=1)

accuracy = np.sum(y_true_corr == y_pred_corr) / len(y_true_corr)
f1_score_NN_macro = f1_score(y_true_corr, y_pred_corr, average='macro')
f1_score_NN_micro = f1_score(y_true_corr, y_pred_corr, average='micro')

print(f'Test Accuracy: {accuracy * 100:.2f}%')
print(f'F1 Score macro: {f1_score_NN_macro:.4f}')
print(f'F1 Score micro: {f1_score_NN_micro:.4f}')



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 95.19%
F1 Score macro: 0.9549
F1 Score micro: 0.9519


We now define our full dataset as correct and incorrect sets and reduce the dimension as we did before :

In [14]:
set_with_err = data_cleaned[columns_of_interest]

We can now test the trained model on the complete dataset containing error and observe the impact on accuracy :

In [16]:
# Apply the function to each group
X_err = set_with_err.drop(['Participant', 'Set', 'Camera', 'Exercise'], axis=1)

# Apply the function to each group and concatenate the results
Y_err = set_with_err['Exercise']

# Normalize data
X_normalized_err = scaler.fit_transform(X_err)

# Train-test split
X_train_err, X_test_err, y_train_err, y_test_err = train_test_split(X_normalized_err, Y_err, test_size=0.2, random_state=42)

# Encode the target variable (assuming 'exercise' is your target variable)
y_test_encoded_err = label_encoder.transform(y_test_err)

# Convert to one-hot encoding
y_test_encoded_onehot_err = onehot_encoder.transform(y_test_encoded_err.reshape(-1, 1))

# Get the predicted probabilities for each row 
y_pred_probs_err = model.predict(X_test_err)

# Convert predicted probabilities to class labels
y_pred_err = np.argmax(y_pred_probs_err, axis=1)

# Assuming y_test_encoded_onehot is the true labels in one-hot encoded form
# Convert one-hot encoded labels to integers
y_test_err = np.argmax(y_test_encoded_onehot_err, axis=1)

# Evaluate the model on the test set with categorical_crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
test_loss_err, test_accuracy_err = model.evaluate(X_test_err, y_test_encoded_onehot_err)

# Convert one-hot encoded labels to integers
y_true_err = np.argmax(y_test_encoded_onehot_err, axis=1)

accuracy_err = np.sum(y_true_err == y_pred_err) / len(y_true_err)
f1_score_NN_macro_err = f1_score(y_true_err, y_pred_err, average='macro')
f1_score_NN_micro_err = f1_score(y_true_err, y_pred_err, average='micro')

print(f'Test Accuracy: {accuracy_err * 100:.2f}%')
print(f'F1 Score macro: {f1_score_NN_macro_err:.4f}')
print(f'F1 Score micro: {f1_score_NN_micro_err:.4f}')

Test Accuracy: 93.48%
F1 Score macro: 0.9346
F1 Score micro: 0.9348


In conclusion for this error study, we notice that the accuracy over the correct dataset is 95.19%, as the accuracy over the full dataset using the same weights obtained after training only over correct sample gives a score of 93.48%. This might give an idea of how introducing a new error that was not taken into acount during training impact the prediction accuracy. This difference is quite small and we can rely on the robustness of Neural Networks to deal with new errors. 