In [1]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys

module_path = os.path.abspath(os.path.join("../.."))
sys.path.append(module_path)

## Dataset and hyperparameters loading

In [3]:
from torchvision.transforms.v2 import Compose
from hyperparameters import HP_DICT

from SLTDataset import SLTDataset
from posecraft.Pose import Pose


DATASET = "RWTH_PHOENIX_2014T"
dataset_path = f"/mnt/disk3Tb/slt-datasets/{DATASET}"
hp = HP_DICT[DATASET]

landmarks_mask = Pose.get_components_mask(hp["LANDMARKS_USED"])
transforms: Compose = Compose(hp["TRANSFORMS"])

train_dataset = SLTDataset(
    data_dir=dataset_path,
    split="train",
    input_mode=hp["INPUT_MODE"],
    output_mode=hp["OUTPUT_MODE"],
    transforms=transforms,
    max_tokens=hp["MAX_TOKENS"],
)
val_dataset = SLTDataset(
    data_dir=dataset_path,
    split="val",
    input_mode=hp["INPUT_MODE"],
    output_mode=hp["OUTPUT_MODE"],
    transforms=transforms,
    max_tokens=hp["MAX_TOKENS"],
)
test_dataset = SLTDataset(
    data_dir=dataset_path,
    split="test",
    input_mode=hp["INPUT_MODE"],
    output_mode=hp["OUTPUT_MODE"],
    transforms=transforms,
    max_tokens=hp["MAX_TOKENS"],
)

Loaded metadata for dataset: RWTH-PHOENIX-Weather 2014 T: Parallel Corpus of Sign Language Video, Gloss and Translation
Loaded train annotations at /mnt/disk3Tb/slt-datasets/RWTH_PHOENIX_2014T/annotations.csv


Validating files: 100%|██████████| 7096/7096 [00:00<00:00, 270352.00it/s]


Dataset loaded correctly

Loaded metadata for dataset: RWTH-PHOENIX-Weather 2014 T: Parallel Corpus of Sign Language Video, Gloss and Translation
Loaded val annotations at /mnt/disk3Tb/slt-datasets/RWTH_PHOENIX_2014T/annotations.csv


Validating files: 100%|██████████| 519/519 [00:00<00:00, 229697.56it/s]

Dataset loaded correctly

Loaded metadata for dataset: RWTH-PHOENIX-Weather 2014 T: Parallel Corpus of Sign Language Video, Gloss and Translation





Loaded test annotations at /mnt/disk3Tb/slt-datasets/RWTH_PHOENIX_2014T/annotations.csv


Validating files: 100%|██████████| 642/642 [00:00<00:00, 234804.95it/s]

Dataset loaded correctly






### Display sample

In [4]:
from IPython.display import HTML

from posecraft.transforms import (
    CenterToKeypoint,
    FillMissing,
    FilterLandmarks,
    ReplaceNansWithZeros,
    InterpolateFrames,
)

visual_transforms = Compose(
    [
        FilterLandmarks(landmarks_mask),
        # CenterToKeypoint(center_keypoint=0),
        # NormalizeDistances(indices=(11, 12), distance_factor=0.2),
        FillMissing(),
        # InterpolateFrames(30),
        ReplaceNansWithZeros(),
    ]
)

anim = train_dataset.visualize_pose(35, transforms=visual_transforms)
HTML(anim.to_jshtml())

<IPython.core.display.Javascript object>

### Text tokenization

In [4]:
from WordLevelTokenizer import WordLevelTokenizer
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch


class_weights_complete = None

if hp["USE_CLASS_WEIGHTS"]:
    texts = train_dataset.annotations[hp["OUTPUT_MODE"]].tolist()
    tokenizer = WordLevelTokenizer()
    tokenized_sequences = tokenizer(texts, padding="max_length", max_length=25)
    flattened_tgts: list[list[int]] = [
        item for sublist in tokenized_sequences for item in sublist
    ]  # type: ignore
    token_ids = sorted(list(set(flattened_tgts)))
    class_weights = compute_class_weight(
        "balanced", classes=np.array(token_ids), y=flattened_tgts
    )
    class_weights_complete = torch.ones(tokenizer.vocab_size)
    class_weights_complete[token_ids] = torch.from_numpy(class_weights).float()

### Dataloader generation

In [5]:
import torch
from torch.utils.data import DataLoader


NUM_WORKERS = 4

train_loader = DataLoader(
    train_dataset,
    batch_size=hp["BATCH_SIZE"],
    num_workers=NUM_WORKERS,
    shuffle=True,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=hp["BATCH_SIZE"],
    num_workers=NUM_WORKERS,
    shuffle=True,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=hp["BATCH_SIZE"],
    num_workers=NUM_WORKERS,
    shuffle=True,
)

In [6]:
for src, tgt in train_loader:
    print(src.shape)
    print(tgt.shape)
    break

torch.Size([64, 200, 150])
torch.Size([64, 50])


## Model

### Definition

In [7]:
from KeypointsTransformer import KeypointsTransformer


num_keypoints = landmarks_mask.sum().item()
in_features = int(num_keypoints * (3 if hp["USE_3D"] else 2))

model = KeypointsTransformer(
    src_len=hp["MAX_FRAMES"],
    tgt_len=hp["MAX_TOKENS"],
    in_features=in_features,
    tgt_vocab_size=train_dataset.tokenizer.vocab_size,
    d_model=hp["D_MODEL"],
    num_encoder_layers=hp["NUM_ENCODER_LAYERS"],
    num_decoder_layers=hp["NUM_DECODER_LAYERS"],
    dropout=hp["DROPOUT"],
)

### Training

In [8]:
import lightning.pytorch.utilities.model_summary.model_summary as model_summary

from Translator import Translator
from LightningKeypointsTransformer import LKeypointsTransformer


device = torch.device(
    "mps"
    if torch.backends.mps.is_available()
    else ("cuda" if torch.cuda.is_available() else "cpu")
)

BATCH_SIZE_TEST = 1
example_input_array = (
    torch.randn(BATCH_SIZE_TEST, hp["MAX_FRAMES"], in_features),
    torch.randint(
        0, train_dataset.tokenizer.vocab_size, (BATCH_SIZE_TEST, hp["MAX_TOKENS"])
    ),
    torch.zeros(hp["MAX_TOKENS"], hp["MAX_TOKENS"]),
    torch.randint(0, 2, (BATCH_SIZE_TEST, hp["MAX_TOKENS"])).bool(),
)
translator = Translator(device, hp["MAX_TOKENS"])
l_model = LKeypointsTransformer(
    model,
    device,
    train_dataset.tokenizer,
    translator,
    hp["LR"],
    example_input_array,
    class_weights_complete,
)
model_summary.summarize(l_model, max_depth=10)

  from .autonotebook import tqdm as notebook_tqdm


   | Name                                                       | Type                            | Params | In sizes                                                                | Out sizes          
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0  | model                                                      | KeypointsTransformer            | 19.0 M | [[1, 200, 150], [1, 50], [50, 50], [1, 50]]                             | [1, 50, 20483]     
1  | model.src_keyp_emb                                         | Conv1DEmbedder                  | 52.4 K | [1, 200, 150]                                                           | [1, 200, 256]      
2  | model.src_keyp_emb.conv1d_1                                | Conv1d                          | 19.3 K | [1, 150, 200]                                                           |

In [9]:
import lightning.pytorch as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import WandbLogger


wandb_logger = WandbLogger(project=DATASET)  # , log_model="all")
wandb_logger.experiment.config.update(hp)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="checkpoint/",
    filename=f"{DATASET}-{wandb_logger.experiment.name}-best-{{epoch:02d}}-{{step:02d}}-{{val_loss:.2f}}",
    mode="min",
    save_last=True,
)
checkpoint_callback.CHECKPOINT_NAME_LAST = f"{DATASET}-{wandb_logger.experiment.name}-last"  # type: ignore

trainer = L.Trainer(
    logger=wandb_logger,
    default_root_dir="./checkpoint",
    callbacks=[
        EarlyStopping(monitor="val_accuracy", mode="max", patience=30),
        checkpoint_callback,
    ],
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mpedroodb[0m ([33mlidiaa[0m). Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
trainer.fit(
    model=l_model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

/home/pdalbianco/anaconda3/envs/slt_datasets/lib/python3.11/site-packages/lightning/pytorch/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
/home/pdalbianco/anaconda3/envs/slt_datasets/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /home/pdalbianco/Github/slt_models_tryout/src/checkpoint exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name     | Type                 | Params | In sizes                                    | Out sizes     
-----------------------------------------------------------------------------------------------------------------
0 | model    | KeypointsTransformer | 19.0 M | [[1, 200, 150], [1, 50], [50, 50], [1, 50]] | [1, 50, 20483]
1 | accuracy | MulticlassAccuracy   | 0      | ?                                           | ?             
-----------------------------------------------------------------------------------------------------------------
19.0 M    Trainable params
0         Non-trainable params
19.0 M    Total params
75.916    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/pdalbianco/anaconda3/envs/slt_datasets/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Epoch 116: 100%|██████████| 85/85 [00:20<00:00,  4.18it/s, v_num=z076]     


In [11]:
import glob


checkpoint = glob.glob(f"checkpoint/{DATASET}-{wandb_logger.experiment.name}-best*")[0]

trainer.test(
    model=l_model,
    dataloaders=test_loader,
    ckpt_path=checkpoint,
)

Restoring states from the checkpoint path at checkpoint/LSAT-glorious-elevator-15-best-epoch=29-step=2550-val_loss=6.34.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at checkpoint/LSAT-glorious-elevator-15-best-epoch=29-step=2550-val_loss=6.34.ckpt
/home/pdalbianco/anaconda3/envs/slt_datasets/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Testing DataLoader 0: 100%|██████████| 27/27 [14:39<00:00,  0.03it/s]


[{'test_loss': 6.3660888671875,
  'test_accuracy': 0.1577364206314087,
  'bleu_1_greedy': 0.04246973618865013,
  'bleu_2_greedy': 0.007209444418549538,
  'bleu_3_greedy': 0.0008738819160498679,
  'bleu_4_greedy': 0.00021833329810760915,
  'bleu_1_beam': 0.012728000059723854,
  'bleu_2_beam': 0.0031248002778738737,
  'bleu_3_beam': 0.0023892580065876245,
  'bleu_4_beam': 0.0021557637955993414}]

In [12]:
# import glob


# CHKP = glob.glob(f"checkpoints/rwth-{wandb_logger.experiment.name}-best*")[0]
# l_model = LKeypointsTransformer.load_from_checkpoint(
#     CHKP, model=model, num_classes=tokenizer.vocab_size
# )

# debug_loader = DataLoader(
#     [test_dataset[i] for i in range(1)], batch_size=BATCH_SIZE
# )

In [13]:
# trainer.test(
#     model=l_model,
#     dataloaders=debug_loader,
#     ckpt_path=CHKP,
# )