In [1]:
import sys
sys.path.insert(0, "../..")
from src.data import data_tools
from pathlib import Path
import gin
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from src.models import rnn_models, train_model
from torch import optim


# 1 Iterators
We will be using an interesting dataset. [link](https://tev.fbk.eu/resources/smartwatch)

From the site:
> The SmartWatch Gestures Dataset has been collected to evaluate several gesture recognition algorithms for interacting with mobile applications using arm gestures. Eight different users performed twenty repetitions of twenty different gestures, for a total of 3200 sequences. Each sequence contains acceleration data from the 3-axis accelerometer of a first generation Sony SmartWatch™, as well as timestamps from the different clock sources available on an Android device. The smartwatch was worn on the user's right wrist. 


In [2]:
from src.settings import gesturesdatasetsettings
from src.data.make_dataset import DatasetFactoryProvider, DatasetType

  warn(


In [3]:
gesturesdatasetfactory = DatasetFactoryProvider.get_factory(DatasetType.GESTURES)

In [4]:
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32)
train = streamers["train"]
valid = streamers["valid"]

2023-06-02 15:18:32.740 | INFO     | src.data.make_dataset:download_data:99 - Dataset already exists at /workspaces/ML22/data/raw/gestures
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:06<00:00, 409.10it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:01<00:00, 393.04it/s]


In [5]:
len(train), len(valid)

(81, 20)

In [6]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 28, 3]),
 tensor([ 4,  0, 17, 10,  5,  8,  2,  8,  7, 19,  0,  0,  3, 19,  5,  9,  0, 13,
         11, 12, 11, 17,  3,  4,  8,  6, 19, 17,  0,  5,  9,  1]))

Can you make sense of the shape?
What does it mean that the shapes are sometimes (32, 27, 3), but a second time might look like (32, 30, 3)? In other words, the second (or first, if you insist on starting at 0) dimension changes. Why is that? How does the model handle this? Do you think this is already padded, or still has to be padded?


# 2 Excercises
Lets test a basemodel, and try to improve upon that.

Fill the gestures.gin file with relevant settings for `input_size`, `hidden_size`, `num_layers` and `horizon` (which, in our case, will be the number of classes...)

As a rule of thumbs: start lower than you expect to need!

In [10]:
from src.settings import TrainerSettings, ReportTypes
from src.models.metrics import Accuracy

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=10,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
settings

epochs: 10
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [11]:
gin.parse_config_file("gestures.gin")
model = rnn_models.BaseRNN()

In [12]:
gin.get_bindings("BaseRNN")

{'input_size': 3, 'hidden_size': 128, 'num_layers': 3, 'horizon': 20}

Test the model. What is the output shape you need? Remember, we are doing classification!

In [13]:
yhat = model(x)
yhat.shape

torch.Size([32, 20])

Test the accuracy

In [14]:
accuracy(y, yhat)

tensor(0.1250)

What do you think of the accuracy? What would you expect from blind guessing?

Check shape of `y` and `yhat`

In [15]:
yhat.shape, y.shape

(torch.Size([32, 20]), torch.Size([32]))

And look at the output of yhat

In [16]:
yhat[0]

tensor([ 0.0349,  0.1734, -0.0380, -0.3100, -0.1482,  0.2014, -0.0265,  0.0360,
        -0.1577, -0.0623, -0.0639, -0.1680,  0.1687, -0.0143,  0.0157, -0.0162,
        -0.1668,  0.0869,  0.0069, -0.2994], grad_fn=<SelectBackward0>)

Does this make sense to you? If you are unclear, go back to the classification problem with the MNIST, where we had 10 classes.

We have a classification problem, so we need Cross Entropy Loss.
Remember, [this has a softmax built in](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html) 

In [17]:
loss_fn = torch.nn.CrossEntropyLoss()
loss = loss_fn(yhat, y)
loss

tensor(2.9984, grad_fn=<NllLossBackward0>)

In [21]:
import mlflow
from datetime import datetime

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
gin.parse_config_file("gestures.gin")

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("BaseRNN")["config"])
    mlflow.log_params(gin.get_bindings("trainloop"))

    model = rnn_models.BaseRNN()
    trainer = train_model.Trainer(
        model=model, 
        settings=settings, 
        loss_fn=loss_fn,
        optimizer=optim.Adam, 
        traindataloader=trainstreamer, 
        validdataloader=validstreamer, 
        scheduler=optim.lr_scheduler.ReduceLROnPlateau
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)

2023-06-02 15:19:55.950 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to gestures/20230602-1519
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:06<00:00, 13.31it/s]
2023-06-02 15:20:02.734 | INFO     | src.models.train_model:report:210 - Epoch 0 train 2.6137 test 2.3226 metric ['0.1625']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 13.72it/s]
2023-06-02 15:20:09.331 | INFO     | src.models.train_model:report:210 - Epoch 1 train 2.1959 test 2.1054 metric ['0.2609']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 13.56it/s]
2023-06-02 15:20:16.040 | INFO     | src.models.train_model:report:210 - Epoch 2 train 1.8563 test 1.7374 metric ['0.3406']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 14.85it/s]
2023-06-02 15:20:22.205 | INFO     | src.models.train_model:report:210 - Epoch 3 train 1.4457 test 1.2887 metric ['0.5016']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 13.88it/s]
2023-06-02 15:20:28.673 | INFO     | src.models.train_

In [None]:
gin.parse_config_file("gestures_gru.gin")
model = rnn_models.GRUmodel()

In [23]:
mlflow.end_run()

Excercises:

- improve the RNN model
- test different things. What works? What does not?
- experiment with either GRU or LSTM layers, create your own models + ginfiles. 
- experiment with adding Conv1D layers.

You should be able to get above 90% accuracy with the dataset.