In [None]:
from torch.utils.data import DataLoader
import torch

from Classes.SignDataLoader import SignDataLoader
from Classes.Imitator import Imitator
from Classes.KeypointDataset import KeypointDataset
from Classes.Tools import Tools

import os

In [None]:
LOG = False

# Training

In [None]:
tools = Tools()

embedding_layer, tokenizer = tools.getLLM()
vocab_size, d_model = embedding_layer.weight.size()

print(f"Vocab size: {vocab_size}, d_model: {d_model}")

DataPath = os.path.join(os.getcwd(), os.pardir, "data", "dataset2")
ModelPath = os.path.join(os.getcwd(), "model")
h5File = os.path.join(DataPath, "keypoints.h5")
csvFile = os.path.join(DataPath, "meta.csv")

# parameters
modelParameters = {
    "model": {
        "version": 1,
        "checkpoint": 1
    },
    "input_size": 543*2,
    "output_size": 3072,
    "learning_rate": 2e-4,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "epochs": 1,
    "logIntervals": 10,
    "checkpointIntervals": 5,
    "batchSize": 32,
    "frameClips": 15 * 35,
}

In [None]:
keypointReader = KeypointDataset(h5Path=h5File, labelsCSV=csvFile, max_seq_len=modelParameters["frameClips"])
dataset = SignDataLoader(tokenizer, embedding_layer, keypointReader, modelParameters["device"])
dataloader = DataLoader(dataset, batch_size=modelParameters["batchSize"], shuffle=True, collate_fn=tools.collate_fn)

In [None]:
# model
model = Imitator(input_size=modelParameters["input_size"], output_size=modelParameters["output_size"], d_model=d_model).to(modelParameters["device"])
print(model)

In [None]:
sort_by_keyword = 'cuda_time_total'

tools.train(model, dataloader, epochs=modelParameters["epochs"], log_interval=modelParameters["logIntervals"], learning_rate=modelParameters["learning_rate"], modelVersions=modelParameters["model"], modelDir=ModelPath)