In [1]:
import sys
sys.path.insert(0, "../..")
import torch
import gin
from pathlib import Path
from src.data import data_tools 
from src.models import metrics, train_model, rnn_models

  warn(


Let's try to see how Attention makes a difference with the gestures dataset
First, get the data

In [2]:
data_dir = Path("../../data/external/gestures-dataset/").resolve()

# get all paths with the .txt extension
formats = [".txt"]
paths = [path for path in data_tools.walk_dir(data_dir) if path.suffix in formats]
# make a train-test split
split = 0.8
idx = int(len(paths) * split)
trainpaths = paths[:idx]
testpaths = paths[idx:]

traindataset = data_tools.TSDataset(trainpaths)
testdataset = data_tools.TSDataset(testpaths)

100%|██████████| 2600/2600 [00:06<00:00, 389.80it/s]
100%|██████████| 651/651 [00:01<00:00, 387.08it/s]


In [3]:
trainloader = data_tools.PaddedDatagenerator(traindataset, batchsize=32)
testloader = data_tools.PaddedDatagenerator(testdataset, batchsize=32)

Set up loss and accuracy

In [4]:
accuracy = metrics.Accuracy()
loss_fn = torch.nn.CrossEntropyLoss()
log_dir = Path("../../models/attention/").resolve()

Set up configuration

In [5]:
gin.parse_config_file("gestures.gin")

config = {
    "input_size" : 3,
    "hidden_size" : 100,
    "dropout" : 0.05,
    "num_layers" : 3,
    "output_size" : 20
}

GRU model like we have seen before

In [6]:
model = rnn_models.GRUmodel(config)

model = train_model.trainloop(
    epochs=10,
    model=model,
    metrics=[accuracy],
    train_dataloader=trainloader,
    test_dataloader=testloader,
    log_dir=log_dir,
    train_steps=len(trainloader),
    eval_steps=len(testloader),
    tunewriter=["tensorboard"]
)

2023-05-23 08:21:24.373 | INFO     | src.data.data_tools:dir_add_timestamp:137 - Logging to /workspaces/ML22/models/attention/20230523-0821
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:07<00:00, 10.81it/s]
2023-05-23 08:21:32.255 | INFO     | src.models.train_model:trainloop:189 - Epoch 0 train 2.4658 test 2.2234 metric ['0.1938']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 13.64it/s]
2023-05-23 08:21:38.582 | INFO     | src.models.train_model:trainloop:189 - Epoch 1 train 2.0046 test 2.0028 metric ['0.2859']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 14.70it/s]
2023-05-23 08:21:44.469 | INFO     | src.models.train_model:trainloop:189 - Epoch 2 train 1.5381 test 1.5064 metric ['0.4547']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:05<00:00, 14.90it/s]
2023-05-23 08:21:50.253 | INFO     | src.models.train_model:trainloop:189 - Epoch 3 train 1.1076 test 1.4330 metric ['0.4219']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:06<00:00, 12.05it/s]
2023-05-23 08:21:

And compare performance with an attention layer added to the model

In [16]:
model = rnn_models.AttentionGRU(config)

model = train_model.trainloop(
    epochs=10,
    model=model,
    metrics=[accuracy],
    train_dataloader=trainloader,
    test_dataloader=testloader,
    log_dir=log_dir,
    train_steps=len(trainloader),
    eval_steps=len(testloader),
    tunewriter=["tensorboard"]
)

2022-12-19 22:52:33.854 | INFO     | src.data.data_tools:dir_add_timestamp:114 - Logging to ../../models/attention/20221219-2252
100%|██████████| 81/81 [00:02<00:00, 39.51it/s]
2022-12-19 22:52:36.389 | INFO     | src.models.train_model:trainloop:171 - Epoch 0 train 2.2744 test 2.1660 metric ['0.2687']
100%|██████████| 81/81 [00:02<00:00, 39.08it/s]
2022-12-19 22:52:38.725 | INFO     | src.models.train_model:trainloop:171 - Epoch 1 train 1.4446 test 1.5761 metric ['0.3422']
100%|██████████| 81/81 [00:02<00:00, 38.46it/s]
2022-12-19 22:52:41.094 | INFO     | src.models.train_model:trainloop:171 - Epoch 2 train 1.0930 test 1.0965 metric ['0.5906']
100%|██████████| 81/81 [00:01<00:00, 40.56it/s]
2022-12-19 22:52:43.335 | INFO     | src.models.train_model:trainloop:171 - Epoch 3 train 0.6677 test 0.9031 metric ['0.6172']
100%|██████████| 81/81 [00:02<00:00, 39.99it/s]
2022-12-19 22:52:45.598 | INFO     | src.models.train_model:trainloop:171 - Epoch 4 train 0.4870 test 0.5145 metric ['0.846

So, this is very nice. In my tensorboard, I see:
- loss is better in both train and test
- accuracy is better
- the model converges faster