In [1]:
import pandas as pd
import os
import numpy as np

# some actions do not require leg movement, resulting in zero variance. replace zero variance with small value.
def merge_raw_data(dir):
    def replace_zero_var(series):
        SMALL_NON_ZERO_VALUE = 1e-2
        var = series.var()
        if var == 0:
            series += np.random.normal(scale=SMALL_NON_ZERO_VALUE, size=len(series))
        return series

    dfs = []
    FEATURES = ["accX1", "accY1", "accZ1", "gyroX1", "gyroY1", "gyroZ1", "accX2", "accY2", "accZ2", "gyroX2", "gyroY2", "gyroZ2"]
    LEG_COLS = ["accX2", "accY2", "accZ2", "gyroX2", "gyroY2", "gyroZ2"]
    for f in os.scandir(dir):
        df = pd.read_csv(f.path, header=None, names=FEATURES)
        for col in LEG_COLS:
            df[col] = replace_zero_var(df[col])
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
from scipy.stats import iqr, skew, kurtosis

def generate_features(merged_df):
    def rms(x):
        return np.sqrt(np.mean(x**2))
    
    WINDOW_SIZE = 25
    transformed_df = merged_df.groupby(np.arange(len(merged_df)) // WINDOW_SIZE).agg({
        "accX1": ['median', iqr, 'var', skew, kurtosis, rms],
        "accY1": ['median', iqr, 'var', skew, kurtosis, rms],
        "accZ1": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroX1": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroY1": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroZ1": ['median', iqr, 'var', skew, kurtosis, rms],
        "accX2": ['median', iqr, 'var', skew, kurtosis, rms],
        "accY2": ['median', iqr, 'var', skew, kurtosis, rms],
        "accZ2": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroX2": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroY2": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroZ2": ['median', iqr, 'var', skew, kurtosis, rms]
    })
    transformed_df.columns = ['_'.join(col).strip() for col in transformed_df.columns.values]
    return transformed_df

In [3]:
import json

with open('./class_dict.json', 'r') as f:
    class_dict = json.load(f)

In [4]:
class_dict

{'captain': 0,
 'grenade': 1,
 'hulk': 2,
 'ironman': 3,
 'logout': 4,
 'random': 5,
 'reload': 6,
 'shangchi': 7,
 'shield': 8}

In [5]:
# RAW_DATA_DIR = './data/raw/'
# MERGED_DATA_DIR = './data/merged/'
AUGMENTED_DATA_DIR = './data/augmented/'

# os.makedirs(MERGED_DATA_DIR, exist_ok=True)
# os.makedirs(AUGMENTED_DATA_DIR, exist_ok=True)
# for dir in os.scandir(RAW_DATA_DIR):
#     merged_df = merge_raw_data(dir)
#     merged_data_file_path = os.path.join(MERGED_DATA_DIR, f"{dir.name}.csv")
#     merged_df.to_csv(merged_data_file_path, index=False)
#     augmented_df = generate_features(merged_df)
#     augmented_df['class'] = class_dict[dir.name]
#     augmented_data_file_path = os.path.join(AUGMENTED_DATA_DIR, f"{dir.name}.csv")
#     augmented_df.to_csv(augmented_data_file_path, index=False)
logout_folder = './data/raw/logout'
merged_logout_fp = './data/merged/logout.csv'
augmented_logout_fp = './data/augmented/logout.csv'
merged_logout_df = merge_raw_data(logout_folder)
merged_logout_df.to_csv(merged_logout_fp, index=False)
augmented_logout_df = generate_features(merged_logout_df)
augmented_logout_df['class'] = 4
augmented_logout_df.to_csv(augmented_logout_fp, index=False)

In [1]:
import os
import pandas as pd

AUGMENTED_DATA_DIR = './data/augmented/'
final_dfs = []
for f in os.scandir(AUGMENTED_DATA_DIR):
    df = pd.read_csv(f.path)
    final_dfs.append(df)
final_df = pd.concat(final_dfs, ignore_index=True)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
final_df

Unnamed: 0,accX1_median,accX1_iqr,accX1_var,accX1_skew,accX1_kurtosis,accX1_rms,accY1_median,accY1_iqr,accY1_var,accY1_skew,...,gyroY2_skew,gyroY2_kurtosis,gyroY2_rms,gyroZ2_median,gyroZ2_iqr,gyroZ2_var,gyroZ2_skew,gyroZ2_kurtosis,gyroZ2_rms,class
0,-0.871,3.979,139.607699,0.076064,-0.380109,11.651967,-17.024,9.955,25.759357,0.070716,...,-1.166787,5.047086,0.747525,0.000000,0.108000,1.067825,1.436342,3.576412,1.016470,0
1,-0.647,2.841,107.880509,-0.003239,0.186298,10.400923,-9.488,9.873,25.779247,-0.381849,...,-1.453018,1.711487,0.763077,0.000000,0.316000,1.106049,1.774639,4.261824,1.031954,0
2,-1.463,6.225,98.215398,0.253025,0.405055,10.059064,-9.705,9.988,24.852623,-0.399810,...,1.948419,5.495760,0.812205,0.000000,0.489000,1.358942,1.704121,4.134404,1.149971,0
3,0.000,2.009,106.646756,0.054148,0.068139,10.133569,-9.621,9.981,26.002545,-0.219957,...,-0.831403,2.470332,0.824894,0.000000,0.409000,1.528126,1.596651,3.924983,1.211259,0
4,0.000,2.320,135.752353,0.054907,-0.386807,11.416073,-9.614,9.957,26.740367,-0.102253,...,-2.710418,8.551469,1.038607,0.000000,0.417000,1.129418,1.728531,4.891945,1.041284,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,-2.727,4.607,9.867046,-0.489387,-0.726773,5.178281,-9.479,10.774,31.893133,0.609979,...,0.204610,-0.330883,0.011864,-0.000970,0.010130,0.000078,-0.096328,0.292246,0.008768,8
3196,-4.864,5.152,14.448643,-0.103638,-0.650035,6.060480,-8.902,10.755,32.303524,0.410515,...,0.255803,-0.850789,0.009004,-0.002193,0.020304,0.000148,-0.548789,-0.834185,0.012860,8
3197,-4.855,5.874,9.937312,-0.281087,-0.911004,5.802308,-8.104,10.854,34.659530,0.492632,...,-0.046485,-0.327823,0.011186,-0.000663,0.012438,0.000074,-0.275535,-0.759217,0.008629,8
3198,-4.180,5.126,8.316582,-0.482818,-1.005024,5.610120,-9.431,10.223,29.303554,0.379150,...,-0.282634,-0.488242,0.009743,0.003483,0.013030,0.000091,-0.004329,-0.862326,0.009540,8


In [8]:
# Should not print anything, indicating no missing values
missing_data_counts = final_df.isna().sum()
for column, count in missing_data_counts.items():
    if count > 0:
        print(f"{column}: {count}")

In [3]:
y = final_df['class'].to_numpy()
x = final_df.drop(['class'], axis=1).to_numpy()

In [4]:
from sklearn.model_selection import train_test_split

# Stratified sampling to get good ratio of classes
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

In [5]:
unique, counts = np.unique(y_train, return_counts=True)
print(np.asarray((unique, counts)).T)

NameError: name 'np' is not defined

In [None]:
unique, counts = np.unique(y_test, return_counts=True)
print(np.asarray((unique, counts)).T)

[[ 0 75]
 [ 1 75]
 [ 2 75]
 [ 3 75]
 [ 4 40]
 [ 5 75]
 [ 6 75]
 [ 7 75]
 [ 8 75]]


In [6]:
import torch
from torch.utils.data import TensorDataset, DataLoader

x_train = torch.from_numpy(x_train).to(torch.float32)
x_test = torch.from_numpy(x_test).to(torch.float32)
y_train = torch.from_numpy(y_train).to(torch.long)
y_test = torch.from_numpy(y_test).to(torch.long)

train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [7]:
import torch
import torch.nn as nn

torch.manual_seed(42)
class Model(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_prob=0.2):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size1)
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.layer3 = nn.Linear(hidden_size2, output_size)
        self.dropout = nn.Dropout(dropout_prob)
        self.relu = nn.ReLU()


    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.layer3(x)
        return x

In [41]:
INPUT_SIZE = x_train.shape[1]
HIDDEN_SIZE1 = 64
HIDDEN_SIZE2 = 64
OUTPUT_SIZE = 9
DROPOUT_PROB = 0.2

model = Model(INPUT_SIZE, HIDDEN_SIZE1, HIDDEN_SIZE2, OUTPUT_SIZE, DROPOUT_PROB)

In [42]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import time

NUM_EPOCHS = 20
LEARNING_RATE = 0.005

def train(model, train_loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100. * correct / total

    return train_loss, train_accuracy

def evaluate(model, x_test, y_test):
    start_time = time.time()
    print(f'Evaluating model performance for test set of size {len(x_test)}')
    model.eval()
    with torch.no_grad():
        y_pred = model(x_test)
        _, y_pred = torch.max(y_pred, 1)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")
    print(f'Accuracy: {100. * accuracy_score(y_test, y_pred)}%')
    print(f'Confusion matrix:\n {confusion_matrix(y_test, y_pred)}')
    print(f'Classification report:\n {classification_report(y_test, y_pred)}')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCHS):
    train_loss, train_accuracy = train(model, train_loader, optimizer, criterion)
    print(f'Epoch {epoch+1}/{NUM_EPOCHS}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%')

Epoch 1/20, Train Loss: 1.2775, Train Accuracy: 57.97%
Epoch 2/20, Train Loss: 0.4853, Train Accuracy: 85.12%
Epoch 3/20, Train Loss: 0.3069, Train Accuracy: 90.74%
Epoch 4/20, Train Loss: 0.2528, Train Accuracy: 92.19%
Epoch 5/20, Train Loss: 0.2131, Train Accuracy: 93.79%
Epoch 6/20, Train Loss: 0.2148, Train Accuracy: 93.67%
Epoch 7/20, Train Loss: 0.1883, Train Accuracy: 94.41%
Epoch 8/20, Train Loss: 0.1604, Train Accuracy: 94.73%
Epoch 9/20, Train Loss: 0.1800, Train Accuracy: 94.80%
Epoch 10/20, Train Loss: 0.1392, Train Accuracy: 95.78%
Epoch 11/20, Train Loss: 0.1439, Train Accuracy: 95.27%
Epoch 12/20, Train Loss: 0.1322, Train Accuracy: 95.74%
Epoch 13/20, Train Loss: 0.1237, Train Accuracy: 96.05%
Epoch 14/20, Train Loss: 0.0950, Train Accuracy: 96.64%
Epoch 15/20, Train Loss: 0.1087, Train Accuracy: 96.64%
Epoch 16/20, Train Loss: 0.1220, Train Accuracy: 96.41%
Epoch 17/20, Train Loss: 0.1075, Train Accuracy: 96.52%
Epoch 18/20, Train Loss: 0.1489, Train Accuracy: 95.47%
E

In [43]:
evaluate(model, x_test, y_test)

Evaluating model performance for test set of size 640
Execution time: 0.04552483558654785 seconds
Accuracy: 98.4375%
Confusion matrix:
 [[74  0  0  0  0  1  0  0  0]
 [ 1 73  0  0  0  1  0  0  0]
 [ 0  0 74  0  0  1  0  0  0]
 [ 0  1  0 74  0  0  0  0  0]
 [ 0  0  0  0 40  0  0  0  0]
 [ 1  0  0  0  0 71  1  0  2]
 [ 0  0  0  0  0  1 74  0  0]
 [ 0  0  0  0  0  0  0 75  0]
 [ 0  0  0  0  0  0  0  0 75]]
Classification report:
               precision    recall  f1-score   support

           0       0.97      0.99      0.98        75
           1       0.99      0.97      0.98        75
           2       1.00      0.99      0.99        75
           3       1.00      0.99      0.99        75
           4       1.00      1.00      1.00        40
           5       0.95      0.95      0.95        75
           6       0.99      0.99      0.99        75
           7       1.00      1.00      1.00        75
           8       0.97      1.00      0.99        75

    accuracy               

In [44]:
import os
from datetime import datetime

os.makedirs('models', exist_ok=True)
dt_string = datetime.now().strftime("%d_%m_%Y-%H_%M_%S")
torch.save(model.state_dict(), f'models/mlp_{dt_string}.pt')

In [45]:
model_params_filename = f'models/mlp_{dt_string}_params.txt'
with open(model_params_filename, 'w+') as file:
    for name, param in model.named_parameters():
        file.write(f'Layer: {name}\n')
        # Convert the parameter values to a NumPy array
        param_array = param.data.cpu().numpy()
        
        if "weight" in name:
            for row in param_array:
                file.write('{')
                for val in row:
                    file.write(f'%.6f, ' % val)
                file.write('}')
                file.write('\n')
        else: # bias
            file.write('{')
            for val in param_array:
                file.write(f'%.6f, ' % val)
            file.write('}')
            file.write('\n')
        file.write('\n')

In [45]:
for i in range(5):
    print(x_test[i])
    print(y_test[i])

tensor([-1.2110e+00,  9.4190e+00,  5.2363e+01, -3.1914e-01, -6.0998e-01,
         7.1646e+00, -1.1897e+01,  1.1820e+01,  6.1554e+01,  6.2736e-01,
        -8.5258e-01,  1.2890e+01, -9.4390e+00,  1.0195e+01,  6.0448e+01,
        -3.4948e-01, -6.6539e-01,  1.4150e+01,  0.0000e+00,  2.4680e+00,
         6.1111e+00, -1.7010e-02, -6.8972e-01,  2.4305e+00,  0.0000e+00,
         1.8700e+00,  2.8650e+00, -7.4931e-01,  2.2949e-01,  1.6584e+00,
         1.8000e-01,  4.0860e+00,  7.6898e+00, -1.2607e-01, -7.7237e-01,
         2.7700e+00,  1.2810e+00,  8.5400e-01,  2.9485e-01,  2.3891e-01,
        -1.1303e+00,  1.5809e+00, -9.2940e+00,  2.0700e-01,  2.8011e-02,
         5.4980e-01, -2.0721e-01,  9.2748e+00, -1.1501e+01,  7.7100e-01,
         2.5161e-01,  7.1682e-01, -6.2354e-01,  1.1290e+01,  0.0000e+00,
         0.0000e+00,  8.7176e-03,  5.2759e-01,  1.7135e-01,  9.4731e-02,
         0.0000e+00,  0.0000e+00,  4.2140e-03,  5.4133e-01,  8.0056e-01,
         6.8282e-02,  0.0000e+00,  0.0000e+00,  2.2

In [35]:
import json

x_test = x_test.tolist()
y_test = y_test.tolist()
labelled_test_data = [{"x": x, "y": y} for x, y in list(zip(x_test, y_test))]
with open('./data/test/test_data.json', 'w+') as test_data_json:
    json.dump(labelled_test_data, test_data_json, indent=4)