In [1]:
import importlib
import sys
import os
sys.path.insert(0, '../src')

In [2]:
import torch
import torch.nn as nn
import os
import json
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import pyarrow.parquet as pq
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm
import pytorch_lightning as pl
import sys
# from sklearn import *
from torchmetrics.classification import accuracy

In [3]:
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, DeviceStatsMonitor, TQDMProgressBar,EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.tuner import Tuner
from config import *
from data import data_utils
from data.dataset import ASL_DATASET
from dl_utils import get_dataloader

In [4]:
from src.models.models import LSTM_BASELINE_Model, LSTM_Predictor, TransformerPredictor

In [5]:
DEVICE

'cpu'

In [6]:
class MyProgressBar(TQDMProgressBar):
    def init_validation_tqdm(self):
        bar = super().init_validation_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_predict_tqdm(self):
        bar = super().init_predict_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

    def init_test_tqdm(self):
        bar = super().init_test_tqdm()
        if not sys.stdout.isatty():
            bar.disable = True
        return bar

In [7]:
# MAX_SEQUENCES = 150
BATCH_SIZE = 256  # Not optimal as not a perfect power of 2, but maximum that fits in my GPU
num_workers = 4 #os.cpu_count() // 2  # or 0
mod_name = "FIRST_TRANSFORMER_MODEL_2"
DL_FRAMEWORK = "PYTORCH"

In [8]:
asl_dataset = ASL_DATASET(augment=True, augmentation_threshold=0.3)

In [9]:
train_ds, val_ds, test_ds = data_utils.create_data_loaders(asl_dataset,
                                                               batch_size=BATCH_SIZE,
                                                               dl_framework=DL_FRAMEWORK,
                                                               num_workers=num_workers)

In [10]:
print(f'Got the lengths for Train-Dataset: {len(train_ds)}, {len(val_ds)}, {len(test_ds)}')

Got the lengths for Train-Dataset: 468, 28, 56


In [12]:
batch = next(iter(train_ds))[0]
batch.dtype

torch.float32

In [13]:
model = TransformerPredictor(
        d_model=192,
        n_head=8,
        dim_feedforward=512,
        dropout=0.3,
        layer_norm_eps=1e-6,
        norm_first=False,
        batch_first=True,
        num_layers=3,
        num_classes=250,
        learning_rate = LEARNING_RATE
    )
model(batch)
model = model.float().to(DEVICE)
print(model)

TransformerPredictor(
  (model): TransformerSequenceClassifier(
    (transformer): TransformerEncoder(
      (layers): ModuleList(
        (0-2): 3 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=192, out_features=192, bias=True)
          )
          (linear1): Linear(in_features=192, out_features=512, bias=True)
          (dropout): Dropout(p=0.3, inplace=False)
          (linear2): Linear(in_features=512, out_features=192, bias=True)
          (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (dropout1): Dropout(p=0.3, inplace=False)
          (dropout2): Dropout(p=0.3, inplace=False)
        )
      )
      (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
    )
    (output_layer): Linear(in_features=192, out_features=250, bias=True)
  )
  (criterion): CrossEntropyLoss()
  (accuracy): Mu

In [18]:
print([p.numel() for p in model.parameters()])

[110592, 576, 36864, 192, 98304, 512, 98304, 192, 192, 192, 192, 192, 110592, 576, 36864, 192, 98304, 512, 98304, 192, 192, 192, 192, 192, 110592, 576, 36864, 192, 98304, 512, 98304, 192, 192, 192, 192, 192, 192, 192, 48000, 250]


In [24]:
[(name, p.numel()) for name, p in model.named_parameters()]

[('model.transformer.layers.0.self_attn.in_proj_weight', 110592),
 ('model.transformer.layers.0.self_attn.in_proj_bias', 576),
 ('model.transformer.layers.0.self_attn.out_proj.weight', 36864),
 ('model.transformer.layers.0.self_attn.out_proj.bias', 192),
 ('model.transformer.layers.0.linear1.weight', 98304),
 ('model.transformer.layers.0.linear1.bias', 512),
 ('model.transformer.layers.0.linear2.weight', 98304),
 ('model.transformer.layers.0.linear2.bias', 192),
 ('model.transformer.layers.0.norm1.weight', 192),
 ('model.transformer.layers.0.norm1.bias', 192),
 ('model.transformer.layers.0.norm2.weight', 192),
 ('model.transformer.layers.0.norm2.bias', 192),
 ('model.transformer.layers.1.self_attn.in_proj_weight', 110592),
 ('model.transformer.layers.1.self_attn.in_proj_bias', 576),
 ('model.transformer.layers.1.self_attn.out_proj.weight', 36864),
 ('model.transformer.layers.1.self_attn.out_proj.bias', 192),
 ('model.transformer.layers.1.linear1.weight', 98304),
 ('model.transformer.la

In [None]:
checkpoint_callback = ModelCheckpoint(
        filename=mod_name + "-{epoch:02d}-{val_accuracy:.2f}",
        save_top_k=1,
        monitor="val_accuracy",
        verbose=True,
        mode="max"
    )

In [None]:
tb_logger = TensorBoardLogger(
        save_dir=os.path.join(ROOT_PATH, "lightning_logs"),
        name=mod_name,
        # version=mod_name
    )

In [None]:
early_stop_callback = EarlyStopping(
        monitor='val_accuracy',
        min_delta=0.005,
        patience=6,
        verbose=True,
        mode='max'
    )

In [None]:
lr_monitor = LearningRateMonitor(logging_interval='step')

In [None]:
trainer = pl.Trainer(
        enable_progress_bar=True,
        accelerator="gpu",
        logger=tb_logger,
        callbacks=[
            DeviceStatsMonitor(),
            early_stop_callback,
           checkpoint_callback,
           MyProgressBar(),
            lr_monitor
        ],
        max_epochs=100,
       # limit_train_batches=10,
        # limit_val_batches=0,
        num_sanity_val_steps=0,
        profiler=None,  # select from None
    )

In [None]:
trainer.fit(
        model=model,
        train_dataloaders=train_ds,
        val_dataloaders=val_ds,

    )


In [29]:
trainer.test(ckpt_path="best",
                 dataloaders=test_ds
                 )

Restoring states from the checkpoint path at /Users/tgdimas1/git/CAS-AML-FINAL-PROJECT/notebooks/../src/../lightning_logs/FIRST_TRANSFORMER_MODEL_2/version_8/checkpoints/FIRST_TRANSFORMER_MODEL_2-epoch=28-val_accuracy=0.77.ckpt
Loaded model weights from the checkpoint at /Users/tgdimas1/git/CAS-AML-FINAL-PROJECT/notebooks/../src/../lightning_logs/FIRST_TRANSFORMER_MODEL_2/version_8/checkpoints/FIRST_TRANSFORMER_MODEL_2-epoch=28-val_accuracy=0.77.ckpt


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.9785623550415039, 'test_accuracy': 0.7791028022766113}]