# Demonstration Notebook

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import preprocess
import data
import model
import train
import loss

## PreProcessing

In [None]:
# DATA_FOLDER Assume a train/val/test split of the files.
DATA_FOLDER = "/path/to/NinaProDB"
DB = "DB7"

In [None]:
# Run once

if False:
    preprocess.NinaProPreprocessor(f"{DATA_FOLDER}/train", DB).preprocess()
    preprocess.NinaProPreprocessor(f"{DATA_FOLDER}/val", DB).preprocess()
    preprocess.NinaProPreprocessor(f"{DATA_FOLDER}/test", DB).preprocess()

In [None]:
# Load the datasets

# 1s seq lengths
train_dataset = data.SequenceDataset(f"{DATA_FOLDER}/train", seq_len=400)

# No seq length => Take directly entire sequences
train_testing_dataset = data.SequenceDataset(f"{DATA_FOLDER}/train")
val_dataset= data.SequenceDataset(f"{DATA_FOLDER}/val")
test_dataset= data.SequenceDataset(f"{DATA_FOLDER}/test")

In [None]:
# Compute mean and std

size = 0
mean_emg, mean_glove = 0, 0
var_emg, var_glove = 0, 0
for user in train_dataset.data:
    s = user["emg"].shape[0]
    mean_emg += user["emg"].sum(axis=0)
    var_emg += user["emg"].var(axis=0) * s
    mean_glove += user["glove"].sum(axis=0)
    var_glove += user["glove"].var(axis=0) * s
    size += s

mean_emg /= size
mean_emg = mean_emg.astype(np.float32)
var_emg /= size
std_emg = np.sqrt(var_emg).astype(np.float32)

mean_glove /= size
mean_glove = mean_glove.astype(np.float32)
var_glove /= size
std_glove = np.sqrt(var_glove).astype(np.float32)

mean_emg, std_emg, mean_glove, std_glove

In [None]:
# Standardize input data

emg_normalizer = data.Normalizer(mean_emg, std_emg)
train_dataset.transform({"emg": emg_normalizer})
train_testing_dataset.transform({"emg": emg_normalizer})
val_dataset.transform({"emg": emg_normalizer})
test_dataset.transform({"emg": emg_normalizer})

In [None]:
# Compute mean baseline on all the datasets

def baseline(mean, test_set):
    loss = 0
    size = 0
    for _, _, glove in test_set:
        size += glove.shape[0]
        loss += np.abs((glove - mean)).sum()

    return loss / size / glove.shape[-1]

train_baseline = baseline(mean_glove, train_testing_dataset)
val_baseline = baseline(mean_glove, val_dataset)
test_baseline = baseline(mean_glove, test_dataset)

train_baseline, val_baseline, test_baseline

## Training 

In [None]:
# Data loaders

batch = 256
train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=batch, drop_last=True)

# Use batch of 1. As sequence have not the same size (full exercise sequence here)
val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=False, batch_size=1)
test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=1)
train_testing_loader = torch.utils.data.DataLoader(train_testing_dataset, shuffle=False, batch_size=1)

In [None]:
# Define the model

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

m = model.LSTMModel(12, 128, 18, 1, 0.5)
m.to(device)

In [None]:
# Training loop

optimizer = torch.optim.Adam(m.parameters(), lr=0.001)
train_criterion = loss.MovementLoss(2, decay_power=1)
val_criterion = torch.nn.L1Loss(reduction="sum")

losses = []

for e in range(10):
    print(f"Epoch {e}:")
    train.train(m, train_loader, optimizer, train_criterion, e, device, logging_freq=100)
    losses.append(train.validation(m, val_loader, val_criterion, device))
    torch.save(m.state_dict(), f"model_{e}.pt")

In [None]:
# Load a specific epoch [optional]
# e = XXX
m.load_state_dict(torch.load(f"model_{e}.pt"))

# Evaluate

In [None]:
# Evaluate on test set
train.validation(m, test_loader, val_criterion, device)

In [None]:
# Evaluate on train set to check that we did at least beat the mean baseline here
train.validation(m, train_testing_loader, val_criterion, device)

## Plots

In [None]:
# Validation loss versus mean baseline

plt.title("Evolution of validation loss (MAE) during training")

plt.xlabel("Epoch")
plt.ylabel("MAE")
plt.ylim(8, 22)
plt.plot(range(1, 11), losses)
plt.plot(range(1, 11), [val_baseline]*10)
plt.legend()
plt.show()

In [None]:
# Check predictions

# emg, stimulus, glove = train_testing_dataset.data[0]
emg, stimulus, glove = val_dataset[0]
input_ = torch.FloatTensor(emg[None, ...]).to(device)
glove_p = m(input_)[0][0].cpu().detach().numpy()

In [None]:
# Plot the glove angles over 80 seconds from timestamp 120

start = 120 * 400
delta = 80 * 400

plt.plot(np.arange(start, start + delta)/400, glove[start:(start+delta)][:,:5])
plt.show()

In [None]:
# Compare with the predicted one

plt.plot(np.arange(start, start + delta)/400, glove_p[start:(start+delta)][:,:5])
plt.show()

In [None]:
# Focus a particular glove angle.

plt.plot(np.arange(start, start + delta)/400, glove[start:(start+delta),10])
plt.plot(np.arange(start, start + delta)/400, glove_p[start:(start+delta),10])
plt.show()