Get to root dir if necessary

In [1]:
%cd ..

/home/pablo/nlp-course/assignment


In [2]:
import torch
import sys
import json
import os
import pytorch_lightning as pl

from pprint import pprint
from transformers import AutoTokenizer

src_path = os.path.join(os.getcwd(), "src")
sys.path.append(src_path)

from models.conv_transformer_model import ConvTransformer
from heads.classification_head import ModelWithClassificationHead
from trainers.classification_trainer import ClassificationModule
from data_loaders.pan23 import PAN23Dataset, PAN23CollatorFn, PAN23DataModule
from utils.freeze_layers import freeze_layers

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
with open(os.path.join(os.getcwd(), "configs", "base-config.json")) as f:
    config = json.load(f)
model_params = config["model_params"]
pan_train_params = config["pan_train_params"]
pprint(model_params)
print("------------------")
pprint(pan_train_params)

{'classification_head_params': {'dropout_p': 0.1, 'ff_dim': 256},
 'conv_layers_params': [{'conv_params': {'in_channels': 32,
                                         'kernel_size': 5,
                                         'out_channels': 128},
                         'dim_feedforward': 128,
                         'dropout_params': {'p': 0.1}},
                        {'conv_params': {'in_channels': 128,
                                         'kernel_size': 5,
                                         'out_channels': 128},
                         'dim_feedforward': 128,
                         'dropout_params': {'p': 0.1}}],
 'projection_head_params': {'dropout_p': 0.1, 'ff_dim': 256, 'output_dim': 128},
 'transformer_model': 'roberta-base'}
------------------
{'data_module_params': {'batch_size': 16,
                        'data_path': 'data/pan23/transformed',
                        'max_len': 512,
                        'tokenizer': 'roberta-base'},
 'optimizer_params': 

In [4]:
model = ConvTransformer(model_params["conv_layers_params"], model_params["transformer_model"])

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model_with_classification_head = ModelWithClassificationHead(
    model=model, 
    input_dim=model.output_embedding_dim,
    **model_params["classification_head_params"]
)

In [6]:
data_path = pan_train_params["data_module_params"]["data_path"]
task1_data_path = os.path.join(data_path, "task1")
pan_train_params["data_module_params"]["data_path"] = task1_data_path

In [7]:
data_module = PAN23DataModule.from_joint_config(
    pan_train_params["data_module_params"]
)


In [8]:
freeze_layers(model.transformer_model, pan_train_params["unfrozen_layers"])

In [9]:
classification_module = ClassificationModule(
    model=model_with_classification_head,
    optimizer_config=pan_train_params["optimizer_params"],
    positive_ratio=data_module.get_positive_ratio(),
)

In [10]:
trainer = pl.Trainer(
    **pan_train_params["trainer_params"]
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [11]:
trainer.fit(classification_module, data_module)

Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: out/conv_transformer_base/finetuned/lightning_logs
Missing logger folder: out/conv_transformer_base/finetuned/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 32. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:01<00:00,  1.34it/s]

/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:429: It is recommended to use `self.log('val_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:429: It is recommended to use `self.log('val_f1_score', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.


Epoch 0: 100%|█████████▉| 403/404 [01:54<00:00,  3.51it/s, v_num=0]        

/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 8. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0: 100%|██████████| 404/404 [01:54<00:00,  3.52it/s, v_num=0]

/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 12. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0: 100%|██████████| 404/404 [02:15<00:00,  2.99it/s, v_num=0, val_loss=0.409, val_f1_score=0.928]

/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:429: It is recommended to use `self.log('train_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:429: It is recommended to use `self.log('train_f1_score', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.


Epoch 4: 100%|██████████| 404/404 [01:56<00:00,  3.47it/s, v_num=0, val_loss=0.409, val_f1_score=0.928, train_loss=0.399, train_f1_score=0.935]

/home/pablo/.micromamba/envs/master-nlp/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


Epoch 5:  59%|█████▉    | 239/404 [01:09<00:47,  3.46it/s, v_num=0, val_loss=0.409, val_f1_score=0.928, train_loss=0.399, train_f1_score=0.935]