In [1]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [15]:
import os, sys

module_path = os.path.abspath(os.path.join("../.."))
sys.path.append(module_path)

## Dataset and hyperparameters loading

In [16]:
from torchvision.transforms.v2 import Compose
from hyperparameters import load_hyperparameters_from_json

from SLTDataset import SLTDataset
from posecraft.Pose import Pose


DATASET = "GSL"
dataset_path = f"/mnt/disk3Tb/slt-datasets/{DATASET}"
hp = load_hyperparameters_from_json("config/gsl.json")

landmarks_mask = Pose.get_components_mask(hp["LANDMARKS_USED"])
transforms: Compose = Compose(hp["TRANSFORMS"])

train_dataset = SLTDataset(
    data_dir=dataset_path,
    split="train",
    input_mode=hp["INPUT_MODE"],
    output_mode=hp["OUTPUT_MODE"],
    transforms=transforms,
    max_tokens=hp["MAX_TOKENS"],
)
val_dataset = SLTDataset(
    data_dir=dataset_path,
    split="val",
    input_mode=hp["INPUT_MODE"],
    output_mode=hp["OUTPUT_MODE"],
    transforms=transforms,
    max_tokens=hp["MAX_TOKENS"],
)
test_dataset = SLTDataset(
    data_dir=dataset_path,
    split="test",
    input_mode=hp["INPUT_MODE"],
    output_mode=hp["OUTPUT_MODE"],
    transforms=transforms,
    max_tokens=hp["MAX_TOKENS"],
)

Loaded metadata for dataset: The Greek Sign Language (GSL) Dataset
Loaded train annotations at /mnt/disk3Tb/slt-datasets/GSL/annotations.csv


Validating files: 100%|██████████| 8821/8821 [00:00<00:00, 260734.43it/s]


Dataset loaded correctly

Loaded metadata for dataset: The Greek Sign Language (GSL) Dataset
Loaded val annotations at /mnt/disk3Tb/slt-datasets/GSL/annotations.csv


Validating files: 100%|██████████| 588/588 [00:00<00:00, 188883.42it/s]


Dataset loaded correctly

Loaded metadata for dataset: The Greek Sign Language (GSL) Dataset
Loaded test annotations at /mnt/disk3Tb/slt-datasets/GSL/annotations.csv


Validating files: 100%|██████████| 881/881 [00:00<00:00, 219246.58it/s]

Dataset loaded correctly






### Display sample

In [17]:
from IPython.display import HTML

idx = 5023
# avoid using the last transform as it flattens the keypoints
visual_transforms: Compose = Compose(hp["TRANSFORMS"][:-1])
anim = train_dataset.visualize_pose(idx, transforms=visual_transforms)
HTML(anim.to_jshtml())

<IPython.core.display.Javascript object>

### Text tokenization

In [18]:
from WordLevelTokenizer import WordLevelTokenizer
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch


class_weights_complete = None

if hp["USE_CLASS_WEIGHTS"]:
    texts = train_dataset.annotations[hp["OUTPUT_MODE"]].tolist()
    tokenizer = WordLevelTokenizer()
    tokenized_sequences = tokenizer(texts, padding="max_length", max_length=25)
    flattened_tgts: list[list[int]] = [
        item for sublist in tokenized_sequences for item in sublist
    ]  # type: ignore
    token_ids = sorted(list(set(flattened_tgts)))
    class_weights = compute_class_weight(
        "balanced", classes=np.array(token_ids), y=flattened_tgts
    )
    class_weights_complete = torch.ones(tokenizer.vocab_size)
    class_weights_complete[token_ids] = torch.from_numpy(class_weights).float()

### Dataloader generation

In [19]:
import torch
from torch.utils.data import DataLoader


NUM_WORKERS = 8

train_loader = DataLoader(
    train_dataset,
    batch_size=hp["BATCH_SIZE"],
    num_workers=NUM_WORKERS,
    shuffle=True,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=hp["BATCH_SIZE"],
    num_workers=NUM_WORKERS,
    shuffle=True,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=hp["BATCH_SIZE"],
    num_workers=NUM_WORKERS,
    shuffle=True,
)



In [20]:
for src, tgt in train_loader:
    print(f"Source shape (Batch, Frames, Keypoints): {src.shape}")
    print(f"Target shape (Batch, Tokens): {tgt.shape}")
    break

Source shape (Batch, Frames, Keypoints): torch.Size([64, 220, 150])
Target shape (Batch, Tokens): torch.Size([64, 20])


## Model

### Definition

In [21]:
from KeypointsTransformer import KeypointsTransformer


num_keypoints = landmarks_mask.sum().item()
in_features = int(num_keypoints * (3 if hp["USE_3D"] else 2))

model = KeypointsTransformer(
    src_len=hp["MAX_FRAMES"],
    tgt_len=hp["MAX_TOKENS"],
    in_features=in_features,
    tgt_vocab_size=train_dataset.tokenizer.vocab_size,
    d_model=hp["D_MODEL"],
    num_encoder_layers=hp["NUM_ENCODER_LAYERS"],
    num_decoder_layers=hp["NUM_DECODER_LAYERS"],
    dropout=hp["DROPOUT"],
    interp=True,
)



### Training

In [22]:
import lightning.pytorch.utilities.model_summary.model_summary as model_summary

from Translator import Translator
from LightningKeypointsTransformer import LKeypointsTransformer
from helpers import create_src_mask, create_target_mask


device = torch.device(
    "mps"
    if torch.backends.mps.is_available()
    else ("cuda" if torch.cuda.is_available() else "cpu")
)

BATCH_SIZE_TEST = 1
sample_src = torch.randn(BATCH_SIZE_TEST, hp["MAX_FRAMES"], in_features)
sample_tgt = torch.randint(
    0, train_dataset.tokenizer.vocab_size, (BATCH_SIZE_TEST, hp["MAX_TOKENS"])
)
sample_src_mask, sample_src_padding_mask = create_src_mask(sample_src, device)
sample_tgt_mask, sample_tgt_padding_mask = create_target_mask(
    sample_tgt, train_dataset.tokenizer.pad_token_id, device
)
example_input_array = (
    sample_src,
    sample_tgt,
    sample_src_mask,
    sample_src_padding_mask,
    sample_tgt_mask,
    sample_tgt_padding_mask,
)
translator = Translator(device, hp["MAX_TOKENS"])
l_model = LKeypointsTransformer(
    model,
    device,
    train_dataset.tokenizer,
    translator,
    hp["LR"],
    example_input_array,
    class_weights_complete,
)
model_summary.summarize(l_model, max_depth=10)

/home/pdalbianco/anaconda3/envs/slt_datasets/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.


   | Name                                                       | Type                            | Params | In sizes                                                                     | Out sizes                   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0  | model                                                      | KeypointsTransformer            | 3.4 M  | [[1, 220, 150], [1, 20], [220, 220], [1, 220], [20, 20], [1, 20]]            | [1, 20, 402]                
1  | model.src_keyp_emb                                         | Conv1DEmbedder                  | 35.8 K | [1, 220, 150]                                                                | [1, 220, 128]               
2  | model.src_keyp_emb.conv1d_1                                | Conv1d                          | 19.3 K | [1, 150, 220]    

In [11]:
import json

import lightning.pytorch as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import WandbLogger


wandb_logger = WandbLogger(project=DATASET)  # , log_model="all")
wandb_logger.experiment.config.update(hp)
results_path = f"results/{DATASET}/{wandb_logger.experiment.name}"
os.makedirs(results_path, exist_ok=True)

with open(f"{results_path}/hp.json", "w") as f:
    json.dump(hp, f, default=str, indent=4)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath=results_path,
    filename=f"best-{{epoch:02d}}-{{step:02d}}-{{val_loss:.2f}}",
    mode="min",
)

trainer = L.Trainer(
    logger=wandb_logger,
    callbacks=[
        EarlyStopping(monitor="val_accuracy", mode="max", patience=30),
        checkpoint_callback,
    ],
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mpedroodb[0m ([33mlidiaa[0m). Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [13]:
trainer.fit(
    model=l_model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | In sizes                                                          | Out sizes   
-------------------------------------------------------------------------------------------------------------------------------------
0 | model    | KeypointsTransformer | 3.4 M  | [[1, 220, 150], [1, 20], [220, 220], [1, 220], [20, 20], [1, 20]] | [1, 20, 402]
1 | accuracy | MulticlassAccuracy   | 0      | ?                                                                 | ?           
-------------------------------------------------------------------------------------------------------------------------------------
3.4 M     Trainable params
0         Non-trainable params
3.4 M     Total params
13.480    Total estimated model params size (MB)


Epoch 99: 100%|██████████| 138/138 [00:09<00:00, 14.87it/s, v_num=nwda]


In [27]:
import glob


checkpoint = glob.glob(f"{results_path}/best*")[0]

trainer.test(
    model=l_model,
    dataloaders=test_loader,
    ckpt_path=checkpoint,
)

if l_model.translation_results_df is not None:
    l_model.translation_results_df.to_csv(
        f"{results_path}/translations.csv", index=False
    )

Restoring states from the checkpoint path at results/GSL/firm-frog-32/best-epoch=69-step=9660-val_loss=0.24.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at results/GSL/firm-frog-32/best-epoch=69-step=9660-val_loss=0.24.ckpt
/home/pdalbianco/anaconda3/envs/slt_datasets/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Testing DataLoader 0:   0%|          | 0/14 [00:00<?, ?it/s]



Testing DataLoader 0: 100%|██████████| 14/14 [00:56<00:00,  0.25it/s]
