In [54]:
import os
from pathlib import Path
import gin
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim

from mads_datasets import datatools

# 1 Iterators
We will be using an interesting dataset. [link](https://tev.fbk.eu/resources/smartwatch)

From the site:
> The SmartWatch Gestures Dataset has been collected to evaluate several gesture recognition algorithms for interacting with mobile applications using arm gestures. Eight different users performed twenty repetitions of twenty different gestures, for a total of 3200 sequences. Each sequence contains acceleration data from the 3-axis accelerometer of a first generation Sony SmartWatch™, as well as timestamps from the different clock sources available on an Android device. The smartwatch was worn on the user's right wrist. 


In [55]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2025-03-09 19:19:05.104[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /Users/rimansingh/.cache/mads_datasets/gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:07<00:00, 368.01it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:01<00:00, 390.54it/s]


In [56]:
len(train), len(valid)

(81, 20)

In [57]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 25, 3]),
 tensor([12, 12,  2, 16, 12, 18,  9,  6,  4,  2, 12, 10,  2, 14,  5,  9, 19,  4,
         11,  8,  8, 18,  7, 10, 12, 18,  4, 15,  0,  3,  9, 13]))

Can you make sense of the shape?
What does it mean that the shapes are sometimes (32, 27, 3), but a second time might look like (32, 30, 3)? In other words, the second (or first, if you insist on starting at 0) dimension changes. Why is that? How does the model handle this? Do you think this is already padded, or still has to be padded?


# 2 Excercises
Lets test a basemodel, and try to improve upon that.

Fill the gestures.gin file with relevant settings for `input_size`, `hidden_size`, `num_layers` and `horizon` (which, in our case, will be the number of classes...)

As a rule of thumbs: start lower than you expect to need!

In [58]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=50,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
settings

epochs: 50
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [59]:
gin.parse_config_file("gestures.gin")
model = rnn_models.BaseRNN()

In [60]:
gin.get_bindings("BaseRNN")

{'input_size': 3, 'hidden_size': 32, 'num_layers': 2, 'horizon': 20}

Test the model. What is the output shape you need? Remember, we are doing classification!

In [61]:
yhat = model(x)
yhat.shape

torch.Size([32, 20])

Test the accuracy

In [62]:
accuracy = Accuracy()
acc_value = accuracy(y, yhat)
print(f"Initial Accuracy: {acc_value}")

Initial Accuracy: 0.03125


What do you think of the accuracy? What would you expect from blind guessing?

Check shape of `y` and `yhat`

In [63]:
yhat.shape, y.shape

(torch.Size([32, 20]), torch.Size([32]))

And look at the output of yhat

In [64]:
print(yhat[0])

tensor([-0.3014,  0.0815,  0.2321, -0.1648, -0.2833, -0.2129, -0.1003, -0.2096,
         0.0835,  0.2973, -0.0101,  0.0977, -0.0847,  0.2912,  0.2653, -0.1393,
         0.0459, -0.1831,  0.0101, -0.2581], grad_fn=<SelectBackward0>)


Does this make sense to you? If you are unclear, go back to the classification problem with the MNIST, where we had 10 classes.

We have a classification problem, so we need Cross Entropy Loss.
Remember, [this has a softmax built in](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html) 

In [65]:
loss_fn = torch.nn.CrossEntropyLoss()
loss = loss_fn(yhat, y)
print(f"Initial Loss: {loss.item()}")

Initial Loss: 3.006039619445801


In [66]:
gin.get_bindings("BaseRNN")

{'input_size': 3, 'hidden_size': 32, 'num_layers': 2, 'horizon': 20}

In [67]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# however, it might speed up training for larger models, with more parameters
device = "cpu"

using cpu


In [68]:
import mlflow
from datetime import datetime

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("./models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

gin.parse_config_file("gestures.gin")

with mlflow.start_run():
    mlflow.set_tag("model", "BaseRNN")
    mlflow.set_tag("dev", "rimandeep")
    mlflow.log_params(gin.get_bindings("BaseRNN"))

    model = rnn_models.BaseRNN()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    model_path = modeldir / f"{timestamp}_BaseRNN_model.pt"
    torch.save(model.state_dict(), model_path)
    mlflow.log_artifact(str(model_path))

[32m2025-03-09 19:19:16.073[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures/20250309-191916[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 46.11it/s]
[32m2025-03-09 19:19:18.075[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.9736 test 2.8433 metric ['0.0875'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 78.65it/s]
[32m2025-03-09 19:19:19.277[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 2.5808 test 2.4544 metric ['0.0891'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 66.67it/s]
[32m2025-03-09 19:19:20.660[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 2 train 2.4731 test 2.4710 metric ['0.0969'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 73.92it/s]
[32m2025-03-09 19:19:21.934[0m | [1mINFO    [0m |

In [69]:
from mltrainer.metrics import Accuracy

accuracy = Accuracy()
yhat = model(x)  # Get predictions from your trained model
acc_value = accuracy(y, yhat)
print(f"Model Accuracy: {acc_value:.2f}")


Model Accuracy: 0.16


Try to update the code above with the following two commands.
    
```python
gin.parse_config_file('gestures_gru.gin')
model = rnn_model.GRUmodel()
```

To discern between the changes, also modify the tag mlflow.set_tag("model", "new-tag-here") where you add
a new tag of your choice. This way you can keep the models apart.

# Implement the GRU Model

In [70]:
import mlflow
from datetime import datetime
from pathlib import Path
import gin
import torch
import torch.optim as optim
from mltrainer import Trainer, TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy
from mltrainer import rnn_models

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")

# Set modeldir to the 'models/gestures' folder in the current directory
modeldir = Path.cwd() / "models" / "gestures"
modeldir.mkdir(parents=True, exist_ok=True)

gin.parse_config_file('gestures_gru.gin') 

model = rnn_models.GRUmodel()

loss_fn = torch.nn.CrossEntropyLoss()

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=50,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Start an MLflow run
with mlflow.start_run():
    # Set a unique tag for this run
    mlflow.set_tag("model", "GRUmodel") 
    mlflow.set_tag("dev", "rimandeep")
    
    mlflow.log_params(gin.get_bindings("GRUmodel"))

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    
    trainer.loop()

    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    model_path = modeldir / f"{timestamp}_GRU_model.pt"
    torch.save(model.state_dict(), model_path)
    mlflow.log_artifact(str(model_path))


[32m2025-03-09 19:20:27.172[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures/20250309-192027[0m


Using device: cpu


100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 32.62it/s]
[32m2025-03-09 19:20:29.924[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.9973 test 2.9683 metric ['0.0891'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 31.11it/s]
[32m2025-03-09 19:20:32.828[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 2.8989 test 2.7396 metric ['0.0984'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 32.58it/s]
[32m2025-03-09 19:20:35.611[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 2 train 2.5496 test 2.4512 metric ['0.1109'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 32.42it/s]
[32m2025-03-09 19:20:38.375[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 3 train 2.4101 test 2.3674 metric ['0.1766'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02

In [71]:
mlflow.end_run()

Excercises:

- improve the RNN model
- test different things. What works? What does not?
- experiment with either GRU or LSTM layers, create your own models + ginfiles. 
- experiment with adding Conv1D layers.

You should be able to get above 90% accuracy with the dataset.

# Implement and Train the LSTM Model

In [72]:
import gin
import os
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
from pathlib import Path
import mlflow
from mltrainer import Trainer, TrainerSettings, ReportTypes, metrics
from mltrainer.preprocessors import PaddedPreprocessor
from mads_datasets import DatasetFactoryProvider, DatasetType
from torchinfo import summary

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)

gin.external_configurable(LSTMModel)

__main__.LSTMModel

In [73]:
# Load Dataset
preprocessor = PaddedPreprocessor()
dataset_factory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = dataset_factory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train_streamer, valid_streamer = streamers["train"].stream(), streamers["valid"].stream()

# Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load Config
gin.clear_config()
gin.enter_interactive_mode()
config_file = "gestures_lstm.gin"

try:
    gin.parse_config_file(config_file)
    print("Configuration loaded successfully.")
except Exception as e:
    print(f"Error loading config: {e}")

# Print Config Parameters
print("Gin Configuration Parameters:")
for param in gin.config._CONFIG:
    print(param)

[32m2025-03-09 19:22:51.190[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /Users/rimansingh/.cache/mads_datasets/gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:06<00:00, 425.24it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:01<00:00, 456.04it/s]


Using device: cpu
Configuration loaded successfully.
Gin Configuration Parameters:
('', '__main__.LSTMModel')


In [74]:
# Fetch Model Parameters from Config
input_size = gin.query_parameter("LSTMModel.input_size")
hidden_size = gin.query_parameter("LSTMModel.hidden_size")
num_layers = gin.query_parameter("LSTMModel.num_layers")
dropout = gin.query_parameter("LSTMModel.dropout")
num_classes = gin.query_parameter("LSTMModel.num_classes")

# Initialize Model
model = LSTMModel(input_size, hidden_size, num_layers, num_classes, dropout).to(device)
print(summary(model, input_size=(32, 100, input_size)))  # Display Model Summary

# Define Optimizer & Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=5)

# Trainer Settings
settings = TrainerSettings(
    epochs=30,
    metrics=[metrics.Accuracy()],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs={"patience": 5},
    optimizer_kwargs={"lr": 0.001}
)

Layer (type:depth-idx)                   Output Shape              Param #
LSTMModel                                [32, 20]                  --
├─LSTM: 1-1                              [32, 100, 64]             50,944
├─Linear: 1-2                            [32, 20]                  1,300
Total params: 52,244
Trainable params: 52,244
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 163.06
Input size (MB): 0.04
Forward/backward pass size (MB): 1.64
Params size (MB): 0.21
Estimated Total Size (MB): 1.89


In [75]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Gesture_LSTM_Experiments")

# Training Loop
with mlflow.start_run():
    mlflow.set_tag("model", "LSTM")

    # Log Hyperparameters
    mlflow.log_params({
        "input_size": input_size,
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout,
        "num_classes": num_classes
    })

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=train_streamer,
        validdataloader=valid_streamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device
    )

    trainer.loop()

    # Save Model
    modeldir = Path("models")
    modeldir.mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    model_path = modeldir / f"{timestamp}_LSTM_model.pt"
    torch.save(model.state_dict(), model_path)

    # Log Model in MLflow
    mlflow.pytorch.log_model(model, "LSTM")
    mlflow.log_artifact(model_path)

mlflow.end_run()
print("\n LSTM Model Training Completed! Check MLflow for results.")


[32m2025-03-09 19:23:00.319[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures/20250309-192300[0m
[32m2025-03-09 19:23:00.323[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:03<00:00, 25.95it/s]
[32m2025-03-09 19:23:03.820[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.7834 test 2.3210 metric ['0.1984'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 32.24it/s]
[32m2025-03-09 19:23:06.683[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 2.1895 test 2.0525 metric ['0.2781'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:03<00:00, 24.67it/s]
[32m2025-03-09 19:23:11.127[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrepor


 LSTM Model Training Completed! Check MLflow for results.


# Experiment with adding Conv1D layers

In [76]:
import gin
import os
from mads_datasets import DatasetFactoryProvider, DatasetType
import torch
import torch.nn as nn
import torch.optim as optim
from mltrainer import Trainer, TrainerSettings, ReportTypes, metrics
from mltrainer.preprocessors import PaddedPreprocessor
from datetime import datetime
from pathlib import Path
import mlflow

# Define the ConvLSTMModel with Conv1D + LSTM
class ConvLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout, conv_filters, kernel_size):
        super(ConvLSTMModel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.dropout = dropout
        self.conv_filters = conv_filters
        self.kernel_size = kernel_size

        # Conv1D layer before LSTM
        self.conv1d = nn.Conv1d(in_channels=input_size, out_channels=conv_filters, kernel_size=kernel_size, padding=1)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(conv_filters, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Rearrange dimensions for Conv1D: (batch, seq_len, input_size) -> (batch, input_size, seq_len)
        x = x.permute(0, 2, 1)
        x = self.conv1d(x)
        x = self.relu(x)
        # Rearrange back: (batch, input_size, seq_len) -> (batch, seq_len, conv_filters)
        x = x.permute(0, 2, 1)
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # Use last time-step
        out = self.fc(out)
        return out

# Register the model as configurable with gin
gin.external_configurable(ConvLSTMModel)

# Load the gin configuration file
gin.clear_config()
gin.enter_interactive_mode()
try:
    gin.parse_config_file("gestures_1d.gin")
    print("Configuration file loaded successfully.")
except Exception as e:
    print(f"Error parsing configuration file: {e}")

# Define preprocessor and dataset
preprocessor = PaddedPreprocessor()
gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Retrieve hyperparameters from gin
input_size   = gin.query_parameter("ConvLSTMModel.input_size")
hidden_size  = gin.query_parameter("ConvLSTMModel.hidden_size")
num_layers   = gin.query_parameter("ConvLSTMModel.num_layers")
dropout      = gin.query_parameter("ConvLSTMModel.dropout")
num_classes  = gin.query_parameter("ConvLSTMModel.num_classes")
conv_filters = gin.query_parameter("ConvLSTMModel.conv_filters")
kernel_size  = gin.query_parameter("ConvLSTMModel.kernel_size")

# Initialize model
model = ConvLSTMModel(input_size, hidden_size, num_layers, num_classes, dropout, conv_filters, kernel_size).to(device)

# Define loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Pass the scheduler as a class instead of an instance
scheduler = optim.lr_scheduler.ReduceLROnPlateau

# Define TrainerSettings with updated early stopping settings (removed "mode")
settings = TrainerSettings(
    epochs=30,
    metrics=[metrics.Accuracy()],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"mode": "min", "factor": 0.1, "patience": 5},
    earlystop_kwargs={
        "patience": 10  # Removed "early_stopping_save"
    },
    optimizer_kwargs={"lr": 0.001}
)


# Set MLflow tracking
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Gesture_ConvLSTM_Experiments")

with mlflow.start_run():
    mlflow.set_tag("model", "ConvLSTM")
    mlflow.log_params({
        "input_size": input_size,
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout,
        "num_classes": num_classes,
        "conv_filters": conv_filters,
        "kernel_size": kernel_size
    })

    # Pass the optimizer as the class (optim.Adam) so Trainer can instantiate it internally
    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=scheduler,
        device=device
    )
    
    trainer.loop()

    # Save trained model
    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    model_path = Path("models") / f"{timestamp}_ConvLSTM_model.pt"
    os.makedirs(model_path.parent, exist_ok=True)
    torch.save(model.state_dict(), model_path)
    mlflow.pytorch.log_model(model, "ConvLSTM")

mlflow.end_run()

print("\nConv1D Model Training Completed! Check MLflow for results.")


[32m2025-03-09 19:24:58.477[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /Users/rimansingh/.cache/mads_datasets/gestures[0m


Configuration file loaded successfully.


100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:08<00:00, 302.30it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:02<00:00, 272.11it/s]
[32m2025-03-09 19:25:10.594[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures/20250309-192510[0m
[32m2025-03-09 19:25:10.599[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m


Using device: cpu


100%|[38;2;30;71;6m██████████[0m| 81/81 [00:04<00:00, 17.62it/s]
[32m2025-03-09 19:25:15.547[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.8627 test 2.6219 metric ['0.1625'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 28.89it/s]
[32m2025-03-09 19:25:18.645[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 2.3005 test 2.0786 metric ['0.2891'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 30.78it/s]
[32m2025-03-09 19:25:21.624[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 2 train 1.9988 test 1.6899 metric ['0.3937'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 27.55it/s]
[32m2025-03-09 19:25:24.888[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 3 train 1.6554 test 1.3935 metric ['0.4375'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:03


Conv1D Model Training Completed! Check MLflow for results.
