In [2]:
import pandas as pd
import numpy as np
import librosa
import sklearn
import sklearn.model_selection
import torch
from torch import nn
from datetime import datetime
import random
import time
import os
import torchmetrics as metrics

In [34]:
def seed_everything(seed_value=4052):
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def feature_extraction(data, sr=100, window_length = 256, hop_length = 64):
    
    extracted_features_frame = pd.DataFrame()
    for col in data.columns:
        zcr = librosa.feature.zero_crossing_rate(y=np.array(data[col], dtype=np.float64),
                                                                        frame_length=window_length, 
                                                                        hop_length=hop_length)
    
        rms = librosa.feature.rms(y=np.array(data[col], dtype=np.float64),
                                                        frame_length=window_length, 
                                                        hop_length=hop_length)
        
        extracted_features_frame[col + " RMS"] = rms.squeeze()
        #extracted_features_frame[col + " ZCR"] = zcr.squeeze()
    
    return extracted_features_frame

def save_metrics(metrics_path:str,date_and_time,model_name:str,time,
                 rows:int,seed:int, metrics_dict:dict):
    # Info common to all metrics
    meta_data = {
    'Date Time': date_and_time,
    'Name': model_name, 
    'Training Time': total_training_time,
    'Rows': rows,
    'Seed': seed
    }
    
    # Merge MetaModel-specific metrics
    metrics_dict = meta_data | metrics_dict
    
    # Convert to data frame
    metrics_frame = pd.DataFrame(columns=metrics_dict.keys())

    # Check if metrics csv exists
    metrics_frame = pd.DataFrame(metrics_dict, index=[0])

    if (os.path.exists(metrics_path)):
        df = pd.read_csv(metrics_path, index_col=0)
        df = pd.concat([df, metrics_frame], ignore_index=True)
        df.to_csv(metrics_path)

    else:
        metrics_frame.to_csv(metrics_path)

In [78]:
data_path = "./data/"

# PRE PROCESSING

# These are irrelevant for the current task due to being idle. We remove them to speed up the training process.
# Unity's Barracuda can only take tensors of size 8 or below, so more filtering must be done.
cols_to_ignore = ["Timestamp", "D-Pad", "Touch", "L3", "R3", "L1", "R1", "L2",
       'Button North', 'Button East', 'Button South', 'Button West', 'R2']

# Filter out slow start and finish, ensure same size.
start_slice = 500
end_slice = 5500

labels_dict = {
    0 : "idle",
    1 : "low_activity",
    2 : "medium_activity",
    3 : "high_activity"
}

# Raw Data
idle_frame_raw = pd.read_csv(data_path + "idle_exaggerated.csv").iloc[start_slice:end_slice]
idle_labels_raw = np.zeros(idle_frame_raw.shape[0])

low_activity_frame_raw = pd.read_csv(data_path + "low_activity_exaggerated.csv").iloc[start_slice:end_slice]
low_activity_labels_raw = np.zeros(low_activity_frame_raw.shape[0]) + 1

medium_activity_frame_raw = pd.read_csv(data_path + "medium_activity_exaggerated.csv").iloc[start_slice:end_slice]
medium_activity_labels_raw = np.zeros(medium_activity_frame_raw.shape[0]) + 2

high_activity_frame_raw = pd.read_csv(data_path + "high_activity_exaggerated.csv").iloc[start_slice:end_slice]
high_activity_labels_raw = np.zeros(high_activity_frame_raw.shape[0]) + 3

# Feature Extract
idle_frame_features = feature_extraction(idle_frame_raw)
idle_labels_features = np.zeros(idle_frame_features.shape[0])

low_activity_frame_features = feature_extraction(low_activity_frame_raw)
low_activity_labels_features = np.zeros(low_activity_frame_features.shape[0]) + 1

medium_activity_frame_features = feature_extraction(medium_activity_frame_raw)
medium_activity_labels_features = np.zeros(medium_activity_frame_features.shape[0]) + 2

high_activity_frame_features = feature_extraction(high_activity_frame_raw)
high_activity_labels_features = np.zeros(high_activity_frame_features.shape[0]) + 3

# Concatenate Raw Dataset
labels_raw = np.concatenate((idle_labels_raw, low_activity_labels_raw, medium_activity_labels_raw, high_activity_labels_raw))
data_raw = pd.concat((idle_frame_raw, low_activity_frame_raw, medium_activity_frame_raw, high_activity_frame_raw))
data_raw.reset_index(drop=True, inplace=True)
# Concatenate Extracted Features Dataset
labels_features = np.concatenate((idle_labels_features, low_activity_labels_features, medium_activity_labels_features, high_activity_labels_features))
data_features = pd.concat((idle_frame_features, low_activity_frame_features, medium_activity_frame_features, high_activity_frame_features))
data_features.reset_index(drop=True, inplace=True)

# Remove unwanted features
for col in data_raw.columns:
    for header in cols_to_ignore:
        if header in col:
            data_raw = data_raw.drop(col, axis=1)
            
for col in data_features.columns:
    for header in cols_to_ignore:
        if header in col:
            data_features = data_features.drop(col, axis=1)
            
            
# There is a leading whitespace in each header. This removes it.
for col in data_raw.columns:
    if (col[0] == " "):
        data_raw.rename(columns={col: col.strip()}, inplace=True)
        data_features.rename(columns={col: col.strip()}, inplace=True)
        
for col in data_features.columns:
    if (col[0] == " "):
        data_features.rename(columns={col: col.strip()}, inplace=True)
        
        
data_features.to_csv("./data/data_features.csv")
data_raw.to_csv("./data/data_raw.csv")
pd.Series(labels_features).to_csv("./data/labels_features.csv")
pd.Series(labels_raw).to_csv("./data/labels_raw.csv")
          

In [70]:
data_features = data_features.iloc[300]

In [71]:
labels_features = labels_features[300]

In [72]:
feat_data_train = torch.tensor(data_features)
feat_data_test = torch.tensor(feat_data_train)
feat_lab_train = torch.tensor(labels_features).double()
feat_lab_test = feat_lab_train

  feat_data_test = torch.tensor(feat_data_train)


In [79]:
feat_data_train

tensor([0.6910, 0.8116, 0.6109, 0.6469, 0.7039, 0.6057, 0.7894, 0.3411, 1.0490,
        0.3531])

In [80]:
seed = 4052
training_size = 0.9
seed_everything(seed_value=seed)
# Convert to tensor
data_raw_tensor = torch.tensor([data_raw[col].astype('float64') for col in data_raw.columns]).T.double()
lab_raw_tensor = torch.tensor(labels_raw).double()

data_features_tensor = torch.tensor([data_features[col].astype('float64') for col in data_features.columns]).T.double()
lab_features_tensor = torch.tensor(labels_features).double()

# For training using features
feat_data_train, feat_data_test, feat_lab_train, feat_lab_test = sklearn.model_selection.train_test_split(data_features_tensor,
                                                                                                        lab_features_tensor,
                                                                                                        train_size=training_size, 
                                                                                                        random_state=seed,
                                                                                                        stratify=labels_features)  

# For training using raw data
raw_data_train, raw_data_test, raw_lab_train, raw_lab_test = sklearn.model_selection.train_test_split(data_raw_tensor,
                                                                        lab_raw_tensor,
                                                                        train_size=training_size, 
                                                                        random_state=seed, 
                                                                        stratify=labels_raw)

In [87]:
feat_data_train[100]

tensor([0.7072, 0.8299, 0.6593, 0.6632, 0.9304, 0.4565, 0.6171, 0.3028, 1.0345,
        0.3524], dtype=torch.float64)

In [146]:
# Using extracted features
model_name = "dsc_torch_nn_rms_overfit_test"
from torchmetrics.classification import MulticlassAccuracy

def train(model, x, y, optimizer, scheduler):
    model.train()
    for feature_vector, label_true in zip(x, y):
        optimizer.zero_grad()
        label_pred = model(feature_vector)
        loss = nn.functional.cross_entropy(label_pred, label_true.long())
        #loss_fn = nn.MSELoss(reduction='mean')
        #loss = loss_fn(label_pred, label_true)
        
        loss.backward()
        optimizer.step()
        scheduler.step()
    
    return loss

@torch.no_grad()
def evaluate(model, x, y):
    model.eval()
    labels_true, predictions = [], []
    for feature_vector, label_true in zip(x, y):
        output = model(feature_vector)
        predictions.append(output.argmax().tolist())
        labels_true.append(label_true.tolist())

    #mca = MulticlassAccuracy(num_classes=4, average='macro')
    #return mca(torch.FloatTensor(predictions), torch.IntTensor(labels_true))
    return (torch.tensor(predictions) == torch.tensor(labels_true)).float().mean() * 100.0
    
class DSC_Classifier(nn.Module):
    def __init__(self, n_feats, n_labels, n_hidden_layers, hidden_dims, dropout):
        super().__init__()
        self.input = nn.Linear(n_feats, hidden_dims, bias=True)
        self.hidden_layers = nn.ModuleList([
            nn.Linear(hidden_dims, hidden_dims) 
            for _ in range(n_hidden_layers)
        ])
        self.output = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden_dims, n_labels)
        )
        
    def forward(self, x):
        x = self.input(x).relu()
        
        for layer in self.hidden_layers:
            x = x + layer(x).relu()
            
        logits = self.output(x)
        return logits

epochs = 10
hidden_layers = 4
hidden_dims = 7
dropout = 0.2
learning_rate = 0.1
gamma=0.9
use_raw = False;

if (use_raw):
    model = DSC_Classifier(len(raw_data_train[1]), len(labels_dict), hidden_layers, hidden_dims, dropout) # 16000, 8, 4
else:
    model = DSC_Classifier(len(feat_data_train[1]), len(labels_dict), hidden_layers, hidden_dims, dropout) # 128, 8, 4

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.0)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

start_time_training = time.time()
for epoch in range(epochs):
    if (use_raw):
        train(model, raw_data_train.float(), raw_lab_train.float(), optimizer, scheduler)
        train_accuracy = evaluate(model, raw_data_train.float(), raw_lab_train.float())
        val_accuracy = evaluate(model, raw_data_test.float(), raw_lab_test.float())
    else:
        loss = train(model, feat_data_train.float(), feat_lab_train.float(), optimizer, scheduler)
        train_accuracy = evaluate(model, feat_data_train.float(), feat_lab_train.float())
        val_accuracy = evaluate(model, feat_data_test.float(), feat_lab_test.float())

    print(f"epoch: {epoch}\tLoss: {loss}  \tTraining accuracy: {train_accuracy:.1f}%\tValidation accuracy: {val_accuracy:.1f}%")
    
    
total_training_time = time.time() - start_time_training

# Save Metrics
now = datetime.now()

# dd/mm/YY H:M:S
date_and_time = now.strftime("%d/%m/%Y %H:%M:%S")

save_metrics("./models/nn_metrics.csv", date_and_time, model_name, total_training_time,
             raw_lab_test.size(), seed, {"Training Accuracy":train_accuracy.item()/100, 
                                         "Validation Accuracy":val_accuracy.item()/100,
                                         "Epochs":epochs, "Hidden Dimensions":hidden_dims, 
                                         "Hidden Layers":hidden_layers,"Dropout":dropout,
                                         "Learning Rate":learning_rate, "Gamma":gamma})


epoch: 0	Loss: 0.04458364099264145  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 1	Loss: 0.05303912237286568  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 2	Loss: 0.7344424724578857  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 3	Loss: 0.04458364099264145  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 4	Loss: 0.20156964659690857  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 5	Loss: 0.32706353068351746  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 6	Loss: 0.04458364099264145  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 7	Loss: 0.5882832407951355  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 8	Loss: 0.030160773545503616  	Training accuracy: 25.0%	Validation accuracy: 25.0%
epoch: 9	Loss: 0.03630359098315239  	Training accuracy: 25.0%	Validation accuracy: 25.0%


In [None]:
dummy_input = torch.randn(1, 8)

In [None]:
torch.onnx.export(model,
                  dummy_input,               # Desired inference input shape
                  model_name + ".onnx",      # Model export path + name
                  export_params=True,        # Store weights with the model
                  opset_version=9,           # Unity requires onnx 9
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['Input'],   # Used to identify layers during debugging
                  output_names = ['Logits']  # Used to identify layers during debugging
                  )


In [None]:
# Raw Data Model
model_name = "dsc_sklearn_svm_raw"
svm_kernel = "rbf"
svm_gamma = 0.15
c_value = 8

scaler = sklearn.preprocessing.StandardScaler()

scaler.fit(raw_data_train)

raw_train = scaler.transform(raw_data_train)
raw_test = scaler.transform(raw_data_test)

model_SVM_raw = SVC(kernel=svm_kernel, C=c_value, gamma=svm_gamma)

start_time_training = time.time()
model_SVM_raw.fit(raw_train, raw_lab_train)
total_training_time = time.time() - start_time_training

lab_predict =  model_SVM_raw.predict(raw_test)

accuracy_score = sklearn.metrics.accuracy_score(raw_lab_test, lab_predict)
#print the number of misclassified samples, accuracy and complete report (using scikit learn metric tools) 
print('Number of mislabeled samples %d out of %d' % ((raw_lab_test != lab_predict).sum(),raw_lab_test.size))
print('Accuracy:',accuracy_score)

# Save Metrics
now = datetime.now()

# dd/mm/YY H:M:S
date_and_time = now.strftime("%d/%m/%Y %H:%M:%S")

save_metrics("./models/metrics.csv", date_and_time, model_name, accuracy_score,c_value,svm_gamma,svm_kernel,raw_lab_test.size, total_training_time, seed)

onnx_model_svm_raw = skl2onnx.convert_sklearn(model_SVM_raw, 
                                            initial_types=initial_type,
                                            name=model_name, 
                                            target_opset=9,
                                            verbose=0)

saved_model = skl2onnx.helpers.onnx_helper.save_onnx_model(onnx_model_svm_raw, "./models/" + model_name + ".onnx")