In [1]:
import pandas as pd
import os
import numpy as np

def merge_raw_data(dir):
    dfs = []
    FEATURES = ["accX1", "accY1", "accZ1", "gyroX1", "gyroY1", "gyroZ1", "accX2", "accY2", "accZ2", "gyroX2", "gyroY2", "gyroZ2"]
    LEG_COLS = ["accX2", "accY2", "accZ2", "gyroX2", "gyroY2", "gyroZ2"]
    for f in os.scandir(dir):
        df = pd.read_csv(f.path, header=None, names=FEATURES)
        df = df.drop(columns=LEG_COLS)
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [6]:
from scipy.stats import iqr, skew, kurtosis

def generate_features(merged_df):
    def rms(x):
        return np.sqrt(np.mean(x**2))
    
    WINDOW_SIZE = 25
    transformed_df = merged_df.groupby(np.arange(len(merged_df)) // WINDOW_SIZE).agg({
        "accX1": ['median', iqr, 'var', skew, kurtosis, rms],
        "accY1": ['median', iqr, 'var', skew, kurtosis, rms],
        "accZ1": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroX1": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroY1": ['median', iqr, 'var', skew, kurtosis, rms],
        "gyroZ1": ['median', iqr, 'var', skew, kurtosis, rms],
    })
    transformed_df.columns = ['_'.join(col).strip() for col in transformed_df.columns.values]
    return transformed_df

In [3]:
import json

with open('./class_dict.json', 'r') as f:
    class_dict = json.load(f)

In [4]:
class_dict

{'captain': 0,
 'grenade': 1,
 'hulk': 2,
 'ironman': 3,
 'logout': 4,
 'random': 5,
 'reload': 6,
 'shangchi': 7,
 'shield': 8}

In [7]:
RAW_DATA_DIR = './data/raw/'
MERGED_DATA_DIR = './data/handonly/merged/'
AUGMENTED_DATA_DIR = './data/handonly/augmented/'

os.makedirs(MERGED_DATA_DIR, exist_ok=True)
os.makedirs(AUGMENTED_DATA_DIR, exist_ok=True)
for dir in os.scandir(RAW_DATA_DIR):
    merged_df = merge_raw_data(dir)
    merged_data_file_path = os.path.join(MERGED_DATA_DIR, f"{dir.name}.csv")
    merged_df.to_csv(merged_data_file_path, index=False)
    augmented_df = generate_features(merged_df)
    augmented_df['class'] = class_dict[dir.name]
    augmented_data_file_path = os.path.join(AUGMENTED_DATA_DIR, f"{dir.name}.csv")
    augmented_df.to_csv(augmented_data_file_path, index=False)

In [8]:
import os
import pandas as pd

AUGMENTED_DATA_DIR = './data/handonly/augmented/'
final_dfs = []
for f in os.scandir(AUGMENTED_DATA_DIR):
    df = pd.read_csv(f.path)
    final_dfs.append(df)
final_df = pd.concat(final_dfs, ignore_index=True)

In [9]:
final_df

Unnamed: 0,accX1_median,accX1_iqr,accX1_var,accX1_skew,accX1_kurtosis,accX1_rms,accY1_median,accY1_iqr,accY1_var,accY1_skew,...,gyroY1_skew,gyroY1_kurtosis,gyroY1_rms,gyroZ1_median,gyroZ1_iqr,gyroZ1_var,gyroZ1_skew,gyroZ1_kurtosis,gyroZ1_rms,class
0,-0.871,3.979,139.607699,0.076064,-0.380109,11.651967,-17.024,9.955,25.759357,0.070716,...,-0.031852,-0.379493,2.506001,0.000,1.081,8.762225,-0.093991,-0.748402,2.908340,0
1,-0.647,2.841,107.880509,-0.003239,0.186298,10.400923,-9.488,9.873,25.779247,-0.381849,...,-0.120597,-0.570993,2.502437,0.000,1.069,6.360700,-0.119359,-0.024385,2.476042,0
2,-1.463,6.225,98.215398,0.253025,0.405055,10.059064,-9.705,9.988,24.852623,-0.399810,...,-0.168045,-0.487042,2.447198,0.156,0.858,6.853012,-0.111923,-0.199015,2.630570,0
3,0.000,2.009,106.646756,0.054148,0.068139,10.133569,-9.621,9.981,26.002545,-0.219957,...,-0.026332,-0.075297,2.401846,0.000,0.946,5.474714,-0.040530,0.247596,2.392161,0
4,0.000,2.320,135.752353,0.054907,-0.386807,11.416073,-9.614,9.957,26.740367,-0.102253,...,0.079260,-0.286870,2.565400,0.000,1.719,7.711332,-0.090902,-0.551575,2.727575,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,-2.727,4.607,9.867046,-0.489387,-0.726773,5.178281,-9.479,10.774,31.893133,0.609979,...,-0.686881,-0.801199,1.267435,0.000,1.629,2.102231,-0.472117,-0.180591,1.477227,8
3196,-4.864,5.152,14.448643,-0.103638,-0.650035,6.060480,-8.902,10.755,32.303524,0.410515,...,-0.419119,0.339695,1.199870,0.000,2.463,3.541341,0.075142,-0.665857,1.852816,8
3197,-4.855,5.874,9.937312,-0.281087,-0.911004,5.802308,-8.104,10.854,34.659530,0.492632,...,-0.140064,-0.513824,0.935372,0.000,2.718,4.213747,-0.491817,-0.367868,2.012744,8
3198,-4.180,5.126,8.316582,-0.482818,-1.005024,5.610120,-9.431,10.223,29.303554,0.379150,...,-0.607597,0.014847,1.442584,0.133,1.099,1.168481,-0.186935,-0.592447,1.126833,8


In [10]:
# Should not print anything, indicating no missing values
missing_data_counts = final_df.isna().sum()
for column, count in missing_data_counts.items():
    if count > 0:
        print(f"{column}: {count}")

In [11]:
y = final_df['class'].to_numpy()
x = final_df.drop(['class'], axis=1).to_numpy()

In [12]:
from sklearn.model_selection import train_test_split

# Stratified sampling to get good ratio of classes
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

In [13]:
unique, counts = np.unique(y_train, return_counts=True)
print(np.asarray((unique, counts)).T)

[[  0 300]
 [  1 300]
 [  2 300]
 [  3 300]
 [  4 160]
 [  5 300]
 [  6 300]
 [  7 300]
 [  8 300]]


In [14]:
unique, counts = np.unique(y_test, return_counts=True)
print(np.asarray((unique, counts)).T)

[[ 0 75]
 [ 1 75]
 [ 2 75]
 [ 3 75]
 [ 4 40]
 [ 5 75]
 [ 6 75]
 [ 7 75]
 [ 8 75]]


In [15]:
import torch
from torch.utils.data import TensorDataset, DataLoader

x_train = torch.from_numpy(x_train).to(torch.float32)
x_test = torch.from_numpy(x_test).to(torch.float32)
y_train = torch.from_numpy(y_train).to(torch.long)
y_test = torch.from_numpy(y_test).to(torch.long)

train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [16]:
import torch
import torch.nn as nn

torch.manual_seed(42)
class Model(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_prob=0.2):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size1)
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.layer3 = nn.Linear(hidden_size2, output_size)
        self.dropout = nn.Dropout(dropout_prob)
        self.relu = nn.ReLU()


    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.layer3(x)
        return x

In [530]:
INPUT_SIZE = x_train.shape[1]
HIDDEN_SIZE1 = 128
HIDDEN_SIZE2 = 64
OUTPUT_SIZE = 9
DROPOUT_PROB = 0.2

model = Model(INPUT_SIZE, HIDDEN_SIZE1, HIDDEN_SIZE2, OUTPUT_SIZE, DROPOUT_PROB)

In [531]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import time

NUM_EPOCHS = 30
LEARNING_RATE = 0.005

def train(model, train_loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100. * correct / total

    return train_loss, train_accuracy

def evaluate(model, x_test, y_test):
    start_time = time.time()
    print(f'Evaluating model performance for test set of size {len(x_test)}')
    model.eval()
    with torch.no_grad():
        y_pred = model(x_test)
        _, y_pred = torch.max(y_pred, 1)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")
    print(f'Accuracy: {100. * accuracy_score(y_test, y_pred)}%')
    print(f'Confusion matrix:\n {confusion_matrix(y_test, y_pred)}')
    print(f'Classification report:\n {classification_report(y_test, y_pred)}')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCHS):
    train_loss, train_accuracy = train(model, train_loader, optimizer, criterion)
    print(f'Epoch {epoch+1}/{NUM_EPOCHS}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%')

Epoch 1/30, Train Loss: 1.3763, Train Accuracy: 52.11%
Epoch 2/30, Train Loss: 0.7275, Train Accuracy: 75.78%
Epoch 3/30, Train Loss: 0.5438, Train Accuracy: 82.77%
Epoch 4/30, Train Loss: 0.4259, Train Accuracy: 86.88%
Epoch 5/30, Train Loss: 0.3398, Train Accuracy: 89.80%
Epoch 6/30, Train Loss: 0.2809, Train Accuracy: 91.13%
Epoch 7/30, Train Loss: 0.2731, Train Accuracy: 91.64%
Epoch 8/30, Train Loss: 0.2450, Train Accuracy: 91.99%
Epoch 9/30, Train Loss: 0.2191, Train Accuracy: 93.12%
Epoch 10/30, Train Loss: 0.2118, Train Accuracy: 93.16%
Epoch 11/30, Train Loss: 0.1916, Train Accuracy: 93.98%
Epoch 12/30, Train Loss: 0.2221, Train Accuracy: 93.12%
Epoch 13/30, Train Loss: 0.1957, Train Accuracy: 93.83%
Epoch 14/30, Train Loss: 0.1738, Train Accuracy: 94.73%
Epoch 15/30, Train Loss: 0.1970, Train Accuracy: 93.63%
Epoch 16/30, Train Loss: 0.1555, Train Accuracy: 94.96%
Epoch 17/30, Train Loss: 0.1670, Train Accuracy: 94.18%
Epoch 18/30, Train Loss: 0.1661, Train Accuracy: 94.30%
E

In [535]:
evaluate(model, x_test, y_test)


Evaluating model performance for test set of size 640
Execution time: 0.0010006427764892578 seconds
Accuracy: 97.1875%
Confusion matrix:
 [[72  0  0  0  0  2  0  0  1]
 [ 0 74  0  0  0  1  0  0  0]
 [ 1  0 74  0  0  0  0  0  0]
 [ 0  0  0 75  0  0  0  0  0]
 [ 0  0  0  0 40  0  0  0  0]
 [ 1  0  0  1  0 66  3  0  4]
 [ 1  0  0  1  0  0 73  0  0]
 [ 0  0  0  0  0  0  0 75  0]
 [ 0  1  0  0  0  1  0  0 73]]
Classification report:
               precision    recall  f1-score   support

           0       0.96      0.96      0.96        75
           1       0.99      0.99      0.99        75
           2       1.00      0.99      0.99        75
           3       0.97      1.00      0.99        75
           4       1.00      1.00      1.00        40
           5       0.94      0.88      0.91        75
           6       0.96      0.97      0.97        75
           7       1.00      1.00      1.00        75
           8       0.94      0.97      0.95        75
...
    accuracy         

In [507]:
import os
from datetime import datetime

os.makedirs('models', exist_ok=True)
dt_string = datetime.now().strftime("%d_%m_%Y-%H_%M_%S")
torch.save(model.state_dict(), f'models/mlp_handonly_{dt_string}.pt')

In [508]:
model_params_filename = f'models/mlp_handonly_{dt_string}_params.txt'
with open(model_params_filename, 'w+') as file:
    for name, param in model.named_parameters():
        file.write(f'Layer: {name}\n')
        # Convert the parameter values to a NumPy array
        param_array = param.data.cpu().numpy()
        
        if "weight" in name:
            for row in param_array:
                file.write('{')
                for val in row:
                    file.write(f'%.6f, ' % val)
                file.write('},')
                file.write('\n')
        else: # bias
            file.write('{')
            for val in param_array:
                file.write(f'%.6f, ' % val)
            file.write('},')
            file.write('\n')
        file.write('\n')

In [533]:
for i in range(5):
    print(x_test[i])
    print(y_test[i])

tensor([-1.6100e+00,  1.2140e+01,  1.1812e+02,  3.5350e-01, -1.5457e-01,
         1.1328e+01, -1.0072e+01,  1.0187e+01,  2.7264e+01, -2.0658e-01,
        -1.6793e+00,  1.4727e+01, -9.3350e+00,  7.9940e+00,  3.1209e+01,
        -4.9545e-01, -9.3515e-01,  1.3203e+01,  0.0000e+00,  1.1290e+00,
         2.6564e+00,  1.5855e-01,  1.0658e+00,  1.6473e+00,  0.0000e+00,
         1.3580e+00,  4.1928e+00,  5.1308e-01,  6.8582e-02,  2.0064e+00,
         0.0000e+00,  3.1750e+00,  9.1346e+00,  2.3218e-01, -9.3537e-01,
         2.9838e+00])
tensor(8)
tensor([-0.1950,  4.5950, 46.6532,  0.1750,  1.0789,  6.6939, -9.2700, 12.0760,
        57.9847,  0.2038, -1.3027,  8.5476, -8.2240,  6.3540, 71.4197, -0.7057,
         1.4705, 10.4061,  0.0000,  1.6010,  4.9837,  0.3139, -0.2602,  2.2711,
         0.0000,  0.3460,  2.7714, -0.9184,  0.8045,  1.6932,  0.0000,  0.9150,
         1.8829,  0.8149,  1.5251,  1.4587])
tensor(3)
tensor([-1.5980,  2.4720, 13.6490, -0.9622,  1.8812,  4.2227, -9.8340,  7.4380,
  

In [534]:
import json

x_test = x_test.tolist()
y_test = y_test.tolist()
labelled_test_data = [{"x": x, "y": y} for x, y in list(zip(x_test, y_test))]
with open('./data/handonly/test_data.json', 'w+') as test_data_json:
    json.dump(labelled_test_data, test_data_json, indent=4)