In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import keras_tuner as kt
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

from sklearn.metrics import classification_report, f1_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras.callbacks import EarlyStopping
from scipy.fft import fft



In [91]:
# Loading the datasets
accelerometer_data = pd.read_csv('/home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/data/preprocced_data/Combined_accelerometer_80min.csv')
gyroscope_data = pd.read_csv('/home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/data/preprocced_data/Combined_gyroscope_80min.csv')
linear_accelerometer_data = pd.read_csv('/home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/data/preprocced_data/Combined_linear_accelerometer_80min.csv')
magnetometer_data = pd.read_csv('/home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/data/preprocced_data/Combined_magnetometer_80min.csv')

### Transformation

In [92]:
# Merge datasets on time and label columns - syncronizing to same time point.
df = pd.merge(accelerometer_data, gyroscope_data, on=['time', 'label'])
df = pd.merge(df, linear_accelerometer_data, on=['time', 'label'])
df = pd.merge(df, magnetometer_data, on=['time', 'label'])

In [93]:
display(df)

Unnamed: 0,time,X (m/s^2)_x,Y (m/s^2)_x,Z (m/s^2)_x,label,X (rad/s),Y (rad/s),Z (rad/s),X (m/s^2)_y,Y (m/s^2)_y,Z (m/s^2)_y,X (µT),Y (µT),Z (µT)
0,0.0,9.986539,-0.004582,0.850397,handshake,-0.351920,-0.527611,0.563546,0.985623,-0.431386,1.030669,-38.050217,-30.089358,-12.836142
1,0.5,5.105301,-0.415617,0.850397,handshake,0.100001,-0.372286,-0.199083,-3.393721,-4.024062,-0.465686,-29.666510,-39.405896,-4.669382
2,1.0,8.871006,-2.852091,0.976863,handshake,0.392478,0.032676,-0.380796,0.328653,-6.355668,0.483817,-27.597006,-38.219096,-4.700792
3,1.5,8.871006,-5.486469,1.202958,handshake,0.049560,-0.169104,-0.362988,0.968687,-6.555062,-0.033927,-25.183688,-39.710181,-8.059941
4,2.0,9.990088,-5.486469,1.151986,handshake,0.057486,0.206438,-0.281776,1.509423,-7.636509,-0.098818,-30.950762,-32.967047,-9.577583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9555,4778.5,-2.424369,5.592076,-4.316893,waving,-0.017034,0.178253,-0.059087,1.895874,-2.766447,-1.512145,19.415402,-37.545494,10.890142
9556,4779.0,-4.087585,4.795250,-3.674595,waving,0.153044,-0.171879,0.268310,1.172432,0.134634,-0.083616,18.625039,-38.041110,11.736007
9557,4779.5,-4.173743,4.213670,-4.552890,waving,0.007826,0.395523,-0.597318,2.564878,-1.729809,-0.560276,15.610884,-41.993545,9.435524
9558,4780.0,-2.880119,5.454926,-3.751154,waving,0.018189,0.051757,-0.648156,0.917136,-4.558381,-0.906490,21.293894,-36.012943,8.505168


##### Fourier Transformation --> Normalization Method: X

###### Fourier Transform

In [94]:

# Applying Fast Fourier Transformation - Discrete fourier Transformation
def apply_dft(df):
    features_X = df.columns.difference(['time', 'label'])
    df[features_X] = np.abs(fft(df[features_X], axis=1))
    return df 

data = apply_dft(df)    

###### Normalization

In [95]:
# Normalize the feature columns
features_X = data.columns.difference(['time', 'label'])
scaler = StandardScaler()
data[features_X] = scaler.fit_transform(data[features_X])

# Encode labels
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

In [96]:
# Verify the unique classes
print(f"Classes: {label_encoder.classes_}")
print(f"Number of classes: {label_encoder.classes_.shape[0]}")

Classes: ['clapping' 'handshake' 'highfive' 'waving']
Number of classes: 4


##### Normalization -> Fourier Transformation Method: Y

###### Normalization

In [97]:
# # Normalize the feature columns
# features = data.columns.difference(['time', 'label'])
# scaler = StandardScaler()
# data[features] = scaler.fit_transform(data[features])

# # Encode labels
# label_encoder = LabelEncoder()
# data['label'] = label_encoder.fit_transform(data['label'])

###### Fourier Transform

In [98]:

# # Applying Fast Fourier Transformation - Discrete fourier Transformation
# def apply_dft(df):
#     features = df.columns.difference(['time', 'label'])
#     df[features] = np.abs(fft(df[features], axis=1))
#     return df 

# data = apply_dft(df)    

# print(data)

### Long Short-term Memory Model Implimentation

###### Create a Sequence

In [99]:
# Reshape the data - DOCUMENTED AS THE WAY TO PASS DATA TO THE LSTM NEEDS TO BE A SEQUENCE to learn the temporal patterns matching sequence to label 
time_steps = 50  # Number of time steps to look back for predictions - RANDOMLY CHOSEN NO RATIONAL YET
X = []
y = []
# Create sequences of data

for i in range(time_steps, len(data)):
    X.append(data.iloc[i-time_steps:i][features_X].values)  # Use past values only, without the label
    y.append(data.iloc[i]['label'])  # Predict the current label

X, y = np.array(X), np.array(y)

###### Data Splitting

In [100]:
# # Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

###### LSTM Model - Tenserflow & Keras

In [115]:
class LstmPipline():
    """
    This is a high-level overview of what the class does.

    More detailed description if necessary.
    """

    def __init__(self, X_train, X_test, y_train, y_test, model_dir='/home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/models'):
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.tuner = None
        self.best_model = None
        self.best_configs = None
        self.model_dir = model_dir
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
    
    
    def model_builder(self, hp):
        """
        Perform some action with param3.

        Args:
            param3 (type): Description of param3.

        Returns:
            return_type: Description of the return value.
        """

        model = Sequential()
        model.add(LSTM(units=hp.Int('units', min_value=32, max_value=128, step=16), 
                    input_shape=(X_train.shape[1], X_train.shape[2])))# Tuning the number of units in the first LSTM layer
        model.add(Dropout(rate=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1))) # Tuning the dropout rate
        model.add(Dense(units=4, activation='softmax'))  # Output layer
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')),
                    loss=keras.losses.SparseCategoricalCrossentropy(),
                    metrics=['accuracy']) # Compile the model
        
        return model


    def tuner_initiator(self):
        """
        Perform some action with param3.

        Args:
            param3 (type): Description of param3.

        Returns:
            return_type: Description of the return value.
        """

        self.tuner = kt.Hyperband(
            self.model_builder,
            objective='val_accuracy',
            max_epochs=50,
            factor=3,
            directory='/home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/results/tuner',
            project_name='ml4qs_gesture_recognition'
        )


    def parameter_search(self):
        """
        Perform some action with param3.

        Args:
            param3 (type): Description of param3.

        Returns:
            return_type: Description of the return value.
        """

        self.tuner.search(self.X_train, self.y_train, epochs=50, validation_split=0.3, callbacks=[EarlyStopping(monitor='val_loss', patience=5)]) # Search for the best parameters 
        best_configs= self.tuner.get_best_hyperparameters(num_trials=1)[0]# Get the optimal hyperparameters

        return best_configs


    def output_optimimal_configs(self, best_configs):
        """
        Perform some action with param3.

        Args:
            param3 (type): Description of param3.

        Returns:
            return_type: Description of the return value.
        """

        units = best_configs.get('units')
        dropout = best_configs.get('dropout')
        learning_rate = best_configs.get('learning_rate')
        optimal_epochs = best_configs.get('tuner/epochs')

        print(f"""
        Optimal hyperparameters:
        - Units in LSTM layer: {units}
        - Dropout rate: {dropout}
        - Learning rate: {learning_rate}
        - Optimal number of epochs: {optimal_epochs} """)


    def evaluation_initiator(self):
        num_iterations = 10
        random_seeds = np.random.randint(0, 1000, size=num_iterations)

        f1_scores = []

        for seed in random_seeds:
            f1 = self.train_evaluate_model(best_configs,seed, X, y)
            f1_scores.append(f1)

        f1_scores = np.array(f1_scores)

        return f1_score


    def train_evaluate_model(self, best_configs, random_seed, X, y):
        """
        Perform some action with param3.

        Args:
            param3 (type): Description of param3.

        Returns:
            return_type: Description of the return value.
        """
        np.random.seed(random_seed)
        tf.random.set_seed(random_seed)
        optimal_epochs = best_configs.get('tuner/epochs')

        hypermodel = self.tuner.hypermodel.build(best_configs)
        hypermodel.fit(self.X_train, self.y_train, epochs=optimal_epochs, validation_split=0.3)
        y_pred = np.argmax(hypermodel.predict(X_test), axis=1)
        f1 = f1_score(self.y_test, y_pred, average='weighted')
        
        return f1

    
    def load_best_model(self):
        """
        Perform some action with param3.

        Args:
            param3 (type): Description of param3.

        Returns:
            return_type: Description of the return value.
        """
        self.best_model = tf.keras.models.load_model(os.path.join(self.model_dir, 'best_model.h5'))


    def predict(self, X_new):
        """
        Makes predictions on new data using the trained best model.

        Args:
            X_new (np.ndarray): New input data for prediction.

        Returns:
            np.ndarray: Predicted class probabilities.
        """
        if self.best_model is None:
            self.load_best_model()

        predictions = self.best_model.predict(X_new)
        
        return predictions


    def run(self):
        self.tuner_initiator()
        best_configs = self.parameter_search()
        self.output_optimimal_configs(best_configs)
        f1_scores  = self.evaluation_initiator()
        


    

In [116]:
tester = LstmPipline(X_train, X_test, y_train, y_test)
print(tester.run())



Reloading Tuner from /home/mzero/main/uni_repo/machine_learning_fqs_2024/ml4qs-gesture-recognition/results/tuner/ml4qs_gesture_recognition/tuner0.json

        Optimal hyperparameters:
        - Units in LSTM layer: 128
        - Dropout rate: 0.0
        - Learning rate: 0.006006969118465886
        - Optimal number of epochs: 50 
Epoch 1/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.5338 - loss: 1.0275 - val_accuracy: 0.7237 - val_loss: 0.6801
Epoch 2/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7494 - loss: 0.6678 - val_accuracy: 0.7543 - val_loss: 0.6126
Epoch 3/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7655 - loss: 0.6079 - val_accuracy: 0.8358 - val_loss: 0.5152
Epoch 4/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.8500 - loss: 0.4487 - val_accuracy: 0.8468 - val_loss: 0.4418
Epoch 5/50
[1

  super().__init__(**kwargs)


[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.5815 - loss: 0.9285 - val_accuracy: 0.7102 - val_loss: 0.6441
Epoch 2/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7654 - loss: 0.5932 - val_accuracy: 0.7162 - val_loss: 0.6262
Epoch 3/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7655 - loss: 0.5879 - val_accuracy: 0.7392 - val_loss: 0.6725
Epoch 4/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.8202 - loss: 0.4870 - val_accuracy: 0.9139 - val_loss: 0.2957
Epoch 5/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9080 - loss: 0.2796 - val_accuracy: 0.9399 - val_loss: 0.1999
Epoch 6/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.8782 - loss: 0.3432 - val_accuracy: 0.7933 - val_loss: 0.5520
Epoch 7/50
[1m146/146[0m [32m━

  super().__init__(**kwargs)


[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.5609 - loss: 1.0057 - val_accuracy: 0.7322 - val_loss: 0.8036
Epoch 2/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.6873 - loss: 0.7855 - val_accuracy: 0.6722 - val_loss: 0.7875
Epoch 3/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.7464 - loss: 0.6197 - val_accuracy: 0.8238 - val_loss: 0.4310
Epoch 4/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.9042 - loss: 0.3064 - val_accuracy: 0.8744 - val_loss: 0.3450
Epoch 5/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.9377 - loss: 0.2143 - val_accuracy: 0.9474 - val_loss: 0.1831
Epoch 6/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.9706 - loss: 0.1063 - val_accuracy: 0.9670 - val_loss: 0.1300
Epoch 7/50
[1m146/146[0m [32m━

  super().__init__(**kwargs)


[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.5714 - loss: 0.9578 - val_accuracy: 0.5986 - val_loss: 0.8796
Epoch 2/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7595 - loss: 0.6079 - val_accuracy: 0.9114 - val_loss: 0.3463
Epoch 3/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.8705 - loss: 0.3991 - val_accuracy: 0.8889 - val_loss: 0.3213
Epoch 4/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9235 - loss: 0.2536 - val_accuracy: 0.9209 - val_loss: 0.2494
Epoch 5/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9301 - loss: 0.2322 - val_accuracy: 0.9324 - val_loss: 0.1801
Epoch 6/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9841 - loss: 0.0699 - val_accuracy: 0.9680 - val_loss: 0.1100
Epoch 7/50
[1m146/146[0m [32m━

  super().__init__(**kwargs)


[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.4969 - loss: 1.0938 - val_accuracy: 0.7447 - val_loss: 0.6555
Epoch 2/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7150 - loss: 0.6807 - val_accuracy: 0.8338 - val_loss: 0.5208
Epoch 3/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7999 - loss: 0.5086 - val_accuracy: 0.9234 - val_loss: 0.2547
Epoch 4/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.8978 - loss: 0.2911 - val_accuracy: 0.9760 - val_loss: 0.0984
Epoch 5/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9695 - loss: 0.1036 - val_accuracy: 0.9610 - val_loss: 0.1202
Epoch 6/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9804 - loss: 0.0629 - val_accuracy: 0.9685 - val_loss: 0.0853
Epoch 7/50
[1m146/146[0m [32m━

  super().__init__(**kwargs)


[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.5466 - loss: 1.0148 - val_accuracy: 0.7568 - val_loss: 0.6398
Epoch 2/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.7357 - loss: 0.6528 - val_accuracy: 0.7487 - val_loss: 0.6532
Epoch 3/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.7693 - loss: 0.5250 - val_accuracy: 0.7693 - val_loss: 0.4754
Epoch 4/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.8592 - loss: 0.3474 - val_accuracy: 0.8714 - val_loss: 0.4054
Epoch 5/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9211 - loss: 0.2484 - val_accuracy: 0.9540 - val_loss: 0.1428
Epoch 6/50
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9508 - loss: 0.1564 - val_accuracy: 0.9825 - val_loss: 0.0853
Epoch 7/50
[1m146/146[0m [32m━

### Validation

In [None]:
import scipy.stats as st

# Calculate mean and standard error of F1 scores
mean_f1 = np.mean(f1_scores)
se_f1 = st.sem(f1_scores)

# Calculate the 95% confidence interval
confidence_level = 0.95
confidence_interval = st.t.interval(confidence_level, len(f1_scores)-1, loc=mean_f1, scale=se_f1)

print(f"Mean F1 Score: {mean_f1:.4f}")
print(f"95% Confidence Interval for F1 Score: ({confidence_interval[0]:.4f}, {confidence_interval[1]:.4f})")


######  F1 Score Metrics

In [None]:
# Assuming you have already split the data into X_train, X_test, y_train, y_test
# and have trained the model

# Make predictions on the test set
y_pred = hypermodel.predict(X_test)

# Convert predictions to class labels
y_pred_classes = np.argmax(y_pred, axis=1)



: 

In [None]:

# Print classification report
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

# Calculate F1 score for each class
f1_scores = f1_score(y_test, y_pred_classes, average=None)
precision_scores = precision_score(y_test, y_pred_classes, average=None)
recall_scores = recall_score(y_test, y_pred_classes, average=None)

# Calculate and print weighted F1 score
f1_weighted = f1_score(y_test, y_pred_classes, average='weighted')
precision_weighted = precision_score(y_test, y_pred_classes, average='weighted')
recall_weighted = recall_score(y_test, y_pred_classes, average='weighted')

print(f'Weighted Precision: {precision_weighted:.4f}')
print(f'Weighted Recall: {recall_weighted:.4f}')
print(f'Weighted F1 Score: {f1_weighted:.5f}')


: 

In [None]:
# Plot F1 scores for each class
classes = label_encoder.classes_
x = np.arange(len(classes))

fig, ax = plt.subplots(figsize=(10, 6))
bar_width = 0.2

# Plot bars for each metric
bars1 = ax.bar(x - bar_width, precision_scores, bar_width, label='Precision')
bars2 = ax.bar(x, recall_scores, bar_width, label='Recall')
bars3 = ax.bar(x + bar_width, f1_scores, bar_width, label='F1 Score')

# Add labels, title, and legend
ax.set_xlabel('Classes')
ax.set_ylabel('Scores')
ax.set_title('Precision, Recall, and F1 Scores for Each Class')
ax.set_xticks(x)
ax.set_xticklabels(classes)
ax.legend()

# Attach a text label above each bar, displaying its height
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

plt.show()

: 