In [1]:
import sys
sys.path.insert(0, "../..")
import torch
import gin
from pathlib import Path
from src.data import data_tools 
from src.models import metrics, train_model, rnn_models
from torch import optim

  warn(


Let's try to see how Attention makes a difference with the gestures dataset
First, get the data

In [2]:
data_dir = Path("../../data/external/gestures-dataset/").resolve()

# get all paths with the .txt extension
formats = [".txt"]
paths = [path for path in data_tools.walk_dir(data_dir) if path.suffix in formats]
# make a train-test split
split = 0.8
idx = int(len(paths) * split)
trainpaths = paths[:idx]
testpaths = paths[idx:]

traindataset = data_tools.TSDataset(trainpaths)
testdataset = data_tools.TSDataset(testpaths)

100%|██████████| 2600/2600 [00:06<00:00, 398.20it/s]
100%|██████████| 651/651 [00:01<00:00, 360.35it/s]


In [3]:
trainloader = data_tools.PaddedDatagenerator(traindataset, batchsize=32)
testloader = data_tools.PaddedDatagenerator(testdataset, batchsize=32)

Set up loss and accuracy

In [4]:
accuracy = metrics.Accuracy()
loss_fn = torch.nn.CrossEntropyLoss()
log_dir = Path("../../models/attention/").resolve()

Set up configuration

In [10]:
from src.settings import TrainerSettings
import gin

gin.parse_config_file("gestures.gin")

# epochs comes from ginfile
settings = TrainerSettings(
    metrics=[accuracy],
    logdir=log_dir,
    train_steps=len(trainloader),
    valid_steps=len(testloader),
    tunewriter=["tensorboard", "gin"],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
grumodel = rnn_models.GRUmodel() # config comes from ginfile
settings

epochs: 10
metrics: [Accuracy]
logdir: /workspaces/ML22/models/attention
train_steps: 81
valid_steps: 20
tunewriter: ['tensorboard']
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [6]:
grutrainer = train_model.Trainer(
    model=grumodel, 
    settings=settings, 
    loss_fn=loss_fn,
    optimizer=optim.Adam, 
    traindataloader=trainloader, 
    validdataloader=testloader, 
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
    )

2023-05-25 16:25:44.814 | INFO     | src.data.data_tools:dir_add_timestamp:137 - Logging to /workspaces/ML22/models/attention/20230525-1625
2023-05-25 16:25:44.828 | INFO     | src.models.train_model:__init__:109 - Found earlystop_kwargs in TrainerSettings. Set to None if you dont want earlystopping.


In [7]:
grutrainer.loop()

100%|[38;2;30;71;6m██████████[0m| 81/81 [00:10<00:00,  8.00it/s]
2023-05-25 16:25:55.604 | INFO     | src.models.train_model:report:207 - Epoch 0 train 2.4844 test 2.4284 metric ['0.1500']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:09<00:00,  8.42it/s]
2023-05-25 16:26:06.147 | INFO     | src.models.train_model:report:207 - Epoch 1 train 1.8175 test 1.7138 metric ['0.2719']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:08<00:00,  9.53it/s]
2023-05-25 16:26:15.272 | INFO     | src.models.train_model:report:207 - Epoch 2 train 1.3656 test 1.3470 metric ['0.4422']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:10<00:00,  7.79it/s]
2023-05-25 16:26:26.285 | INFO     | src.models.train_model:report:207 - Epoch 3 train 0.9602 test 1.2433 metric ['0.4766']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:07<00:00, 10.22it/s]
2023-05-25 16:26:34.690 | INFO     | src.models.train_model:report:207 - Epoch 4 train 0.6499 test 1.1694 metric ['0.6266']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00

GRU model like we have seen before

And compare performance with an attention layer added to the model

In [8]:
attentionmodel = rnn_models.AttentionGRU()

attentiontrainer = train_model.Trainer(
    model=attentionmodel, 
    settings=settings, 
    loss_fn=loss_fn,
    optimizer=optim.Adam, 
    traindataloader=trainloader, 
    validdataloader=testloader, 
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
    )
attentiontrainer.loop()

2023-05-25 16:30:11.587 | INFO     | src.data.data_tools:dir_add_timestamp:137 - Logging to /workspaces/ML22/models/attention/20230525-1630
2023-05-25 16:30:11.598 | INFO     | src.models.train_model:__init__:109 - Found earlystop_kwargs in TrainerSettings. Set to None if you dont want earlystopping.
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:10<00:00,  7.89it/s]
2023-05-25 16:30:22.510 | INFO     | src.models.train_model:report:207 - Epoch 0 train 2.1805 test 1.6557 metric ['0.3469']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:08<00:00,  9.27it/s]
2023-05-25 16:30:31.968 | INFO     | src.models.train_model:report:207 - Epoch 1 train 1.4393 test 1.3366 metric ['0.4891']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:09<00:00,  8.32it/s]
2023-05-25 16:30:42.464 | INFO     | src.models.train_model:report:207 - Epoch 2 train 1.0654 test 1.3068 metric ['0.5703']
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:09<00:00,  8.84it/s]
2023-05-25 16:30:52.192 | INFO     | src.models.train_mode

So, this is very nice. In my tensorboard, I see:
- loss is better in both train and test
- accuracy is better
- the model converges faster