In [1]:
%cd ..

/media/Z/NDT/PytorchLightning/4. NLP


In [2]:
import typing
import torch
import datasets
import pytorch_lightning as pl
import pdb
from transformers import (
    AdamW,
    AutoConfig,
    AutoModelForTokenClassification,
    AutoTokenizer,
    get_linear_schedule_with_warmup,
)
import os
from torch.utils.data import DataLoader
from dataset.ner_dataloader import NerDataset
from configs import *
from pytorch_lightning import seed_everything, Trainer

In [3]:

class NERModelModule(pl.LightningModule):
    def __init__(
        self,
        model_name_or_path: str,
        num_labels: int,
        tags_list: typing.List[str],
        precision: int = 32,
        learning_rate: float = 2e-5,
        adam_epsilon: float = 1e-8,
        warmup_steps: int = 0,
        weight_decay: float = 0.0,
        train_batch_size: int = 32,
        eval_batch_size: int = 32,
        batch_size: int = 16,
        **kwargs,
    ):
        super().__init__()
        self.tags_list = tags_list
        self.save_hyperparameters()
        self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)
        self.model = AutoModelForTokenClassification.from_pretrained(model_name_or_path, config=self.config) 
        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
        self.batch_size  = batch_size
        self.example_input_array = {
            'input_ids': torch.randint(3, 2000, (16, 128)).type(torch.LongTensor) ,
            'attention_mask':  torch.ones(16, 128),
            'labels':  torch.ones(16, 128).type(torch.LongTensor) 
        }
        self.metrics = datasets.load_metric('seqeval') 
        self.train_data = NerDataset(
            dataset_path=os.path.join(PATH_DATASET, 'version_2', 'train_data.txt'),
            model_name_or_path=model_name_or_path,
            tags_list=self.tags_list,
            max_seq_length=152,
            label_all_tokens=False)
    def forward(self, **inputs):
        return self.model(**inputs)

    def training_step(self, batch, batch_idx):
        outputs = self(**batch)
        loss = outputs[0]
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def train_dataloader(self):
        from torch.utils.data import DataLoader
        return DataLoader(self.train_data,  batch_size=self.batch_size)
    
    def configure_optimizers(self):
        """Prepare optimizer and schedule (linear warmup and decay)"""
        model = self.model
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.hparams.weight_decay,
            },
            {
                "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
        return [optimizer]

In [4]:
model_module = NERModelModule(model_name_or_path=BASE_MODEL_NAME,
                        num_labels=len(TAGS),
                        tags_list=TAGS,
                        batch_size=32
                        )

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-st

In [5]:
trainer = Trainer(accelerator="gpu", auto_scale_batch_size="binsearch")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [6]:
trainer.tune(model_module)

  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]
  rank_zero_warn(
  rank_zero_warn(
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 2 succeeded, trying batch size 4
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 4 succeeded, trying batch size 8
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 8 succeeded, trying batch size 16
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 16 succeeded, trying batch size 32
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 32 succeeded, trying batch size 64
  rank_zero_warn(
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 64 succeeded, trying batch size 128
  rank_zero_warn(
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 128 succeeded, trying batch size 256
  rank_zero_warn(
Batch size 256 failed, trying batch size 192
  rank_zero_warn(
Batch size 192 failed, trying batch size 160
  rank_zero_warn(
`Trainer.fit` stopped: `max_steps=3` reached.
Batch size 160 suc

{'scale_batch_size': 184}

In [7]:
print(model_module.batch_size)

184


In [8]:
#=> 184