## Prepare the data

### Setup the tokenizer

In [1]:
from transformers import AutoTokenizer

MPATH = "roberta-base"

tokenizer = AutoTokenizer.from_pretrained(MPATH)

### Setup the data modules

In [2]:
from datautils import PersuationSchemeDataModule, PersuationSchemeBatcher


DPATH = r"..\data\persuasionforgood_corpus"

batcher = PersuationSchemeBatcher(tokenizer)
dm = PersuationSchemeDataModule(
    DPATH,
    batcher=batcher,
    batch_size=64
)

# To harvest the id2label dict
dm.setup('test')
id2label = dm.test_dataset.id2label

Building the Utterance Labels: 100%|██████████| 1534/1534 [00:00<00:00, 66691.50it/s]


# Prepare the model

In [3]:
from modelling import PersuasionSchemeClassifier


SAVE_PATH = f"../models/persuasion-schemes-classifier"
CKPT = ""

model = PersuasionSchemeClassifier(
    MPATH,
    id2label=id2label
)
print("Loaded fresh model")

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Loaded fresh model


In [4]:
tokenizer.save_pretrained(SAVE_PATH)

('../models/persuasion-schemes-classifier\\tokenizer_config.json',
 '../models/persuasion-schemes-classifier\\special_tokens_map.json',
 '../models/persuasion-schemes-classifier\\vocab.json',
 '../models/persuasion-schemes-classifier\\merges.txt',
 '../models/persuasion-schemes-classifier\\added_tokens.json',
 '../models/persuasion-schemes-classifier\\tokenizer.json')

### Setup Callbacks

In [5]:
from pytorch_lightning.callbacks import(
    EarlyStopping,
    ModelCheckpoint,
    RichModelSummary
)

checkpoint_callback = ModelCheckpoint(
    SAVE_PATH,
    filename=f'epoch-{{epoch}}-{{val_loss:.2f}}',
    monitor='val_overall_f1_score',
    save_weights_only=True,
    mode='max'
)
early_stop_callback = EarlyStopping(
    monitor="val_overall_f1_score",
    min_delta=1e-4, patience=8,
    verbose=False,
    mode="max"
)

# Train the model

In [6]:
from pytorch_lightning import Trainer, seed_everything


seed_everything(42, workers=True)

trainer = Trainer(
    max_epochs=-1,
    deterministic=True,
    accumulate_grad_batches=2,
    callbacks=[checkpoint_callback, early_stop_callback, RichModelSummary()],
    accelerator='gpu',
    log_every_n_steps=16
)

trainer.fit(model, dm)

Global seed set to 42
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Building the Utterance Labels: 100%|██████████| 3451/3451 [00:00<00:00, 67668.09it/s]
Building the Utterance Labels: 100%|██████████| 1151/1151 [00:00<00:00, 67696.55it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

### Evaluate model

In [11]:
model = PersuasionSchemeClassifier.load_from_checkpoint(r'D:\repos\cse546-rl-assignments\grp_prj\models\persuasion-schemes-classifier\epoch-epoch=18-val_loss=0.56.ckpt')

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

In [12]:
import torch
with torch.no_grad():
    trainer.test(model, dm)

Building the Utterance Labels: 100%|██████████| 1534/1534 [00:00<00:00, 61369.72it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

### Push to hub

In [13]:
# model.seq_classifier.push_to_hub('nasheed/rl-grp-prj-per-cls')

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/nasheed/rl-grp-prj-per-cls/commit/83617b5381eef42d71b06a2706f1ea3e900f3f19', commit_message='Upload RobertaForSequenceClassification', commit_description='', oid='83617b5381eef42d71b06a2706f1ea3e900f3f19', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# tokenizer.push_to_hub('nasheed/rl-grp-prj-per-cls')