### Install Dependancies and importing Libraries

In [None]:
!pip install transformers --quiet
!pip install pytorch-lightning --quiet
!pip install -qqq wandb pytorch-lightning torchmetrics --quiet

[K     |████████████████████████████████| 1.7 MB 5.4 MB/s 
[K     |████████████████████████████████| 180 kB 45.6 MB/s 
[K     |████████████████████████████████| 140 kB 46.8 MB/s 
[K     |████████████████████████████████| 97 kB 5.7 MB/s 
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
import torchmetrics

# 🏋️‍♀️ Weights & Biases
import wandb

# ⚡ 🤝 🏋️‍♀️
from pytorch_lightning.loggers import WandbLogger

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mspranjal25[0m (use `wandb login --relogin` to force relogin)


True

In [None]:
import os

import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset, RandomSampler, random_split
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
import transformers

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from argparse import ArgumentParser

### Download the Data

In [None]:
# download the datasets - gdown links

!gdown 'https://drive.google.com/uc?export=download&id=1HLIBHxwrrNmdXy4I9dzWzSSYguKG424a'

!gdown 'https://drive.google.com/uc?export=download&id=1MFZxfYC7Wox5vsZAgTLRP0vCsvhLuqLK'

Downloading...
From: https://drive.google.com/uc?export=download&id=1HLIBHxwrrNmdXy4I9dzWzSSYguKG424a
To: /content/Enternet_ABSA_train.csv
100% 360k/360k [00:00<00:00, 56.6MB/s]
Downloading...
From: https://drive.google.com/uc?export=download&id=1MFZxfYC7Wox5vsZAgTLRP0vCsvhLuqLK
To: /content/Enternet_ABSA_test.csv
100% 88.3k/88.3k [00:00<00:00, 26.9MB/s]


### Setup JSON

In [None]:
setup = {
    
    # Random Seed
    'random_state' : 44,

    # File Paths
    'file_path_test' : '/content/Enternet_ABSA_test.csv',
    'file_path_train' : '/content/Enternet_ABSA_train.csv',
    
    # Model Params
    'model_name' : 'bert-base-uncased',

    # Training params
    'epochs' : 10,
    'lr' : 1e-5,
    'max_sen_length' : 312,
    'num_labels' : 3,
    'train_batch_size' : 16,
    'test_batch_size' : 32,

    # Callbacks
    'min_delta' : 1e-3,
    'patience' : 3,

    # For parallel loading
    'num_workers' : 2
}

#### Set the seed

In [None]:
import random

def seed_all(seed_val:int)->None:
  random.seed(seed_val)
  np.random.seed(seed_val)
  torch.manual_seed(seed_val)
  torch.cuda.manual_seed_all(seed_val)
  print(f'Seeded! [seed : {seed_val}]')


seed_all(setup['random_state'])

Seeded! [seed : 44]


### Loading the datasets - Train & Test

In [None]:
def load_data():
  train = pd.read_csv(setup['file_path_train'])
  test = pd.read_csv(setup['file_path_test'])

  return train, test

def Get_Class_weights(data):
  inverse_weights = np.array(data['label'].value_counts().sort_index())
  weights = np.sum(inverse_weights) / inverse_weights
  return weights

### Defining the pretrained model

In [None]:
model_ckpt = setup['model_name']

transformer_tokenizer = transformers.AutoTokenizer.from_pretrained(model_ckpt)

In [None]:
train, test = load_data()

In [None]:
train.columns

Index(['text', 'aspect', 'label'], dtype='object')

### Defining the ABSA Dataset class

In [None]:
class ABSADataset(Dataset):
  def __init__(self, df, tokenizer, max_len=setup['max_sen_length']):
    self.texts = df['text']
    self.aspects = df['aspect']
    if 'label' in df.columns:
      # print('****Labels Present****')
      self.targets = df['label']

    else:
      self.targets = None

    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.aspects)

  def __getitem__(self, idx):

    # convert indexes, tensor->list
    if torch.is_tensor(idx):
      idx = idx.tolist()
    
    # define the aspect and text item
    text = (str(self.texts[idx]))
    aspect = str(self.aspects[idx])

    # define the label
    target = self.targets[idx]

    # pair the aspect and text for pair-encoding
    pairs = [aspect, text]
    
    '''
    # |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    # | For Debugging            |
    # |__________________________|
    # print(f' text: {text}')
    # print(f' aspect: {aspect}')
    # print(type(text))
    # print(type(aspect))
    '''
    
    # encode the feature pair
    encoded = self.tokenizer.encode_plus(pairs,
                                    add_special_tokens=True,
                                    padding='max_length', 
                                    max_length=setup['max_sen_length'], 
                                    return_attention_mask=True,
                                    return_tensors='pt',
                                    truncation=True)
    '''
    # |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    # | For Debugging            |
    # |__________________________|
    # for ids in encoded['input_ids']:
    #   print('*'*20)
    #   print(f'{self.tokenizer.decode(ids)} of length = {len(self.tokenizer.decode(ids).split(" "))}')
    #   print(f'is encoded as : \n{ids} \nwith length = {len(ids)}')
    #   print('*'*20)
    '''
    
    return {
        'label' : target,
        'input_ids' : encoded['input_ids'],
        'attention_mask' : encoded['attention_mask'] 
    }

# |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
# | TestDataset for inferencing, this does not load labels   |
# |__________________________________________________________|

class ABSATest_Dataset(Dataset):
  def __init__(self, df, tokenizer, max_len=setup['max_sen_length']):
    self.texts = df['text']
    self.aspects = df['aspect']
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.aspects)

  def __getitem__(self, idx):

    # convert indexes, tensor->list
    if torch.is_tensor(idx):
      idx = idx.tolist()
    
    # define the aspect and text item
    text = (str(self.texts[idx]))
    aspect = str(self.aspects[idx])

    # pair the aspect and text for pair-encoding
    pairs = [text, aspect]
    
    '''
    # |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    # | For Debugging            |
    # |__________________________|
    # print(f' text: {text}')
    # print(f' aspect: {aspect}')
    # print(type(text))
    # print(type(aspect))
    '''
    
    # encode the feature pair
    encoded = self.tokenizer.encode_plus(pairs,
                                    add_special_tokens=True,
                                    padding='max_length', 
                                    max_length=setup['max_sen_length'], 
                                    return_attention_mask=True,
                                    return_tensors='pt',
                                    truncation=True)
    '''
    # |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    # | For Debugging            |
    # |__________________________|
    # for ids in encoded['input_ids']:
    #   print('*'*20)
    #   print(f'{self.tokenizer.decode(ids)} of length = {len(self.tokenizer.decode(ids).split(" "))}')
    #   print(f'is encoded as : \n{ids} \nwith length = {len(ids)}')
    #   print('*'*20)
    '''
    
    return {
        'input_ids' : encoded['input_ids'],
        'attention_mask' : encoded['attention_mask'] 
    }

### Defining the ABSA Datamodule Class

In [None]:
class ABSADataModule(pl.LightningDataModule):
  def __init__(self, batch_size: int = setup['train_batch_size'], num_workers: int = setup['num_workers'], transformer_tokenizer=transformer_tokenizer):
    super().__init__()

    self.batch_size = batch_size
    self.num_workers = num_workers
    self.transformer_tokenizer = transformer_tokenizer


  # prepare the dataset and split
  def prepare_data(self):
    # load data
    df_train, df_test = load_data()

    # Add any preprocessing (if required) here:

    # Split data into training and validation
    self.df_train, self.df_valid = train_test_split(df_train, test_size=0.25, stratify=df_train['label'])
    self.df_test = df_test

    self.class_weghts = Get_Class_weights(self.df_train)

    self.df_train.reset_index(drop=True, inplace=True)
    self.df_valid.reset_index(drop=True, inplace=True)

  # get the dataset obbjects    
  def setup(self, stage=None):
    self.train_dataset = ABSADataset(self.df_train, tokenizer=transformer_tokenizer)
    self.val_dataset = ABSADataset(self.df_valid, tokenizer=transformer_tokenizer)
    self.test_dataset = ABSADataset(self.df_test, tokenizer=transformer_tokenizer)

  # get the dataloader objects - PyTorch
  def train_dataloader(self):
    self.train_dataset = ABSADataset(self.df_train, tokenizer=transformer_tokenizer)
    return DataLoader(self.train_dataset, shuffle = True, batch_size=self.batch_size, num_workers=self.num_workers)

  def val_dataloader(self):
    self.val_dataset = ABSADataset(self.df_valid, tokenizer=transformer_tokenizer)
    return DataLoader(self.val_dataset, shuffle = True, batch_size=self.batch_size, num_workers=self.num_workers)

  def test_dataloader(self):
    self.test_dataset = ABSADataset(self.df_test, tokenizer=transformer_tokenizer)
    return DataLoader(self.test_dataset, shuffle = True, batch_size=self.batch_size, num_workers=self.num_workers)


### Definning the Model

In [None]:
class ABSAModel_Bert(torch.nn.Module):

  def __init__(self, num_labels=setup['num_labels'], config = setup, **kwargs):
    super(ABSAModel_Bert, self).__init__()
    
    self.num_labels = num_labels
    self.bert = transformers.AutoModel.from_pretrained(config['model_name'])
    self.bert_config = transformers.AutoConfig.from_pretrained(config['model_name'])

    self.pre_classifier = torch.nn.Linear(self.bert_config.hidden_size, self.bert_config.hidden_size)

    self.classifier = torch.nn.Linear(self.bert_config.hidden_size, self.num_labels)

    self.dropout = torch.nn.Dropout(self.bert_config.hidden_dropout_prob)
    # print(f'Using Dropout = {self.bert.config.seq_classif_dropout}')

    self.relu = torch.nn.ReLU()

    '''
    |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    | freeze the layers of Bert for training if needed so that |   
    | the embeddings of all layers of Bert are not changed     |
    |__________________________________________________________|
    '''
    # for param in self.bert.parameters():
    #   param.requires_grad = False

  
  def forward(self, batch):

  #   print((batch['input_ids'].squeeze(1)).shape)
  #   print("*"*10)
  #   print(batch['input_ids'])
  #   print("*"*10)
    
    outputs = self.bert(input_ids=batch['input_ids'].squeeze(1), 
                        attention_mask=batch['attention_mask'])
    
    # output from last hidden layer
    hidden_state = outputs[0]  # (batch_size, seq_len, dim)

    '''
    |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    | *output of [CLS] token                                   |
    |                                                          |
    | [CLS] token contains the pooled embeddings of the entire | 
    | Sequence, these are used for the classification.         |
    |__________________________________________________________|
    '''
    pooled_output = hidden_state[:, 0] # (batch_size, dim)

    '''
    |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    | sending the [CLS] token embeddings through Linear, ReLU  |
    | and Dropout layers                                       |
    |__________________________________________________________|
    '''
    pooled_output = self.pre_classifier(pooled_output)  # (batch_size, dim)
    pooled_output = self.relu(pooled_output)  # (batch_size, dim)
    pooled_output = self.dropout(pooled_output)  # (batch_size, dim)
    logits = self.classifier(pooled_output)  # (batch_size, num_labels)

    return logits

  def get_outputs(self, input_ids, attention_mask):
    outputs = self.bert(input_ids=input_ids, \
                        attention_mask=attention_mask)
    

### Lightning Module


In [None]:
def get_model(name:str):
  if name == 'BERT':
    return ABSAModel_Bert()
  elif name == 'DeBERTa':
    return ABSAModel_deBERTa()

In [None]:
# |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
# | Define the Pytorch Lightning Module Classifier Class     |
# |__________________________________________________________|

class ABSASentimentClassifier(pl.LightningModule):

  def __init__(self, learning_rate = setup['lr'], weights=None, **kwargs):
    super().__init__()

    self.save_hyperparameters('learning_rate', 'max_epochs')
    self.model = get_model('BERT')
    self.weights = weights
    self.preds = []

    self.train_acc = torchmetrics.Accuracy()
    self.valid_acc = torchmetrics.Accuracy()
    self.test_acc = torchmetrics.Accuracy()

  def forward(self, x):
    y_hat = self.model(x)

    return y_hat
  
  def training_step(self, batch, batch_nb):

    # Forward
    y_hat = self.model(batch)

    # if self.weights:
    #   self.weights = torch.tensor(class_weights,dtype=torch.float) 
    
    # Loss
    loss_fct = torch.nn.CrossEntropyLoss()
    
    loss = loss_fct(y_hat.view(-1, self.model.num_labels), batch['label'].view(-1))

    # Logs
    self.log('train/acc', self.train_acc, on_epoch=True)
    self.log('train/loss', loss, on_epoch=True)
    self.log_dict({'training_loss':loss}, prog_bar=True)

    return loss

  
  def validation_step(self, batch, batch_nb):
    
    # Forward
    y_hat = self.model(batch)
        
    # Loss
    loss_fct = torch.nn.CrossEntropyLoss()
    loss = loss_fct(y_hat.view(-1, self.model.num_labels), batch['label'].view(-1))

    # Acc
    a, y_hat = torch.max(y_hat, dim=1)
    val_acc = accuracy_score(y_hat.cpu(), batch['label'].cpu())
    val_acc = torch.tensor(val_acc)
    
    # Logs
    self.log_dict({'val_loss':loss,'val_acc':val_acc}, prog_bar=True)
    
    
    return loss

  
  def test_step(self, batch, batch_nb):
    self.model.eval()
    
    # Forward
    yhat = self.model(batch)
      
    # Loss
    # loss_fct = torch.nn.CrossEntropyLoss()
    # loss = loss_fct(y_hat.view(-1, self.model.num_labels), batch['label'].view(-1))
    
    # a, y_hat = torch.max(y_hat, dim=1)
    # test_acc = accuracy_score(y_hat.cpu(), batch['label'].cpu())
    
    # Logs
    # self.log_dict({'test_loss':loss,'test_acc':test_acc}, prog_bar=True)
    self.preds = self.preds.extend(yhat.cpu().detach().numpy().tolist())
    return 

  
  def predict_step(self, batch, batch_idx: int , dataloader_idx: int = None):

    return self.model(batch)


  '''
  |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
  | Training Setup  |
  |_________________|
  '''
  def configure_optimizers(self):
    '''
    |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    |   REQUIRED                                                            |
    |   can return multiple optimizers and learning_rate schedulers         |
    |   (LBFGS it is automatically supported, no need for closure function) |
    |_______________________________________________________________________|
    '''
    optimizer = torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=self.hparams.learning_rate, eps=1e-08)
    scheduler = {   
      'scheduler': torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=5e-5, 
                                                       steps_per_epoch=len(self.trainer.datamodule.train_dataloader()),
                                                       epochs=self.hparams.max_epochs),
                 
      'interval': 'step'  # called after each training step
    } 

    #scheduler = StepLR(optimizer, step_size=1, gamma=0.2)
    #scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-7, max_lr=1e-4, cycle_momentum=False,step_size_up=300)
    #scheduler = ReduceLROnPlateau(optimizer, patience=0, factor=0.2)
    
    return [optimizer], [scheduler]

  @staticmethod
  def add_model_specific_args(parent_parser, root_dir):  # pragma: no-cover
    """
    |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    | Define parameters that only apply to this model     |
    |_____________________________________________________|
    """
    parser = ArgumentParser(parents=[parent_parser])

    # network params
    #parser.add_argument('--drop_prob', default=0.2, type=float)

    # data
    parser.add_argument('--data_root', default=os.path.join(root_dir, 'train_val_data'), type=str)

    # training params (opt)
    parser.add_argument('--learning_rate', default=setup['lr'], type=float, help = "type (default: %(default)f)")
    return parser

### Defining Training Parameters


In [None]:
'''
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| TRAINING ARGUMENTS                |
| these are project-wide arguments  |
|___________________________________|
'''
root_dir = os.getcwd()
parent_parser = ArgumentParser(add_help=False)
parent_parser = pl.Trainer.add_argparse_args(parent_parser)

# each LightningModule defines arguments relevant to it
parser = ABSASentimentClassifier.add_model_specific_args(parent_parser,root_dir)

assert torch.cuda.is_available(), "GPU(s) not present, to train on CPU, comment this line and set GPU=0 in parser.set_defualts"

parser.set_defaults(
        #profiler='simple',
        deterministic=True,
        max_epochs=setup['epochs'],
        gpus=1,
        # weights=class_weights 
        limit_train_batches=1.0,
        limit_val_batches=1.0,
        limit_test_batches=1.0,
        distributed_backend=None,
        fast_dev_run=False,
        model_load=False,
        model_name='lightning_logs/version_0/checkpoints/epoch=1-step=2343.ckpt'
    )

args, extra = parser.parse_known_args()

''' 
Main training routine specific for this project. 
 |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
 | 1 INIT LIGHTNING MODEL    |
 |___________________________|
'''
if (vars(args)['model_load']):
  model = ABSASentimentClassifier.load_from_checkpoint(vars(args)['model_name'])
else:  
  model = ABSASentimentClassifier(**vars(args))


'''
 |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
 | 2 CALLBACKS OF THE MODEL  |
 |___________________________|
'''
early_stop = EarlyStopping(
    monitor='val_loss',
    min_delta=setup['min_delta'],
    patience=setup['patience'],
    verbose=True,
    mode='min',
    strict=True,
)

lr_monitor = LearningRateMonitor(logging_interval='step')

checkpoint_callback = ModelCheckpoint(
     monitor='val_loss',
     #dirpath='my/path/',
     filename='absa-classfiy-epoch{epoch:02d}-val_loss{val_loss:.2f}',
     auto_insert_metric_name=False
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
!pip install wandb



In [None]:
# !pip install wandb --quiet
wandb.init(project="ABSA-wandb", entity="spranjal25")

wandb_logger = WandbLogger(project='ABSA-wandb')

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

ModuleNotFoundError: ignored

In [None]:
'''
 |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
 | 3 INIT Trainer   |
 |__________________|
'''
trainer = Trainer.from_argparse_args(args,
    callbacks=[early_stop,lr_monitor, checkpoint_callback]
    )    

absa_dm = ABSADataModule()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


### Train the model

In [None]:
'''
 |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
 | 4 START Training   |
 |____________________|
'''
trainer.fit(model,absa_dm)
#trainer.validate()
# trainer.test()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | ABSAModel_Bert | 110 M 
1 | train_acc | Accuracy       | 0     
2 | valid_acc | Accuracy       | 0     
3 | test_acc  | Accuracy       | 0     
---------------------------------------------
110 M     Trainable params
0         Non-trainable params
110 M     Total params
440.301   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"Your `{mode.dataloader_prefix}_dataloader` has `shuffle=True`,"


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
# trainer.test(model, absa_dm)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

KeyboardInterrupt: ignored

In [None]:
!cp -r /content/lightning_logs/version_2 /content/drive/MyDrive/Internship\ Assignments/Enterpret/Checkpoints

In [None]:
_, test = load_data()

testset = ABSATest_Dataset(test, tokenizer=transformer_tokenizer)

testLoader = DataLoader(testset, batch_size=setup['test_batch_size'])

# model.eval()
# model.freeze()

In [None]:
model_infer = ABSASentimentClassifier.load_from_checkpoint('/content/drive/MyDrive/Internship Assignments/Enterpret/Checkpoints/version_2/checkpoints/absa-classfiy-epoch03-val_loss0.64.ckpt')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
preds = trainer.predict(model, testLoader)

  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 188it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "


In [None]:
preds_final = []

for pred_list in preds:
  for pred in pred_list:
    label = torch.argmax(pred)
    preds_final.append(label.cpu().detach().numpy().tolist())

In [None]:
test['classified'] = preds_final

In [None]:
test.to_csv('/content/results.csv')

## DeBERTa

In [None]:
from transformers import DebertaTokenizer, DebertaForSequenceClassification

In [None]:
tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')


In [None]:
train_2, test_2 = load_data()

texts = str(train_2['text'])
aspects = str(train_2['aspect'])

pairs = []

for text, aspect in zip(texts, aspects):
  pairs.append([aspect, text])

encoded = tokenizer.batch_encode_plus(pairs, padding='max_length', max_length=128, return_attention_mask=True, return_tensors='pt')

labels = train_2['label']

labels = torch.tensor(labels, dtype=torch.float)

outputs = model(**encoded, labels=labels)
loss = output.loss
logits = output.logits

### Define the DeBERTa Model class

In [None]:
class ABSAModel_deBERTa(torch.nn.Module):

  def __init__(self, num_labels=setup['num_labels'], config = setup, **kwargs):
    super(ABSAModel_deBERTa, self).__init__()
    
    self.num_labels = num_labels
    self.model = transformers.AutoModel.from_pretrained('microsoft/deberta-base')
    self.l1 = torch.nn.Linear(768, 64)
    self.d1 = torch.nn.Dropout(0.2)
    self.bn1 = torch.nn.LayerNorm(64)
    self.d2 = torch.nn.Dropout(0.2)
    self.l2 = torch.nn.Linear(64, num_labels)

    '''
    |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
    | freeze the layers of Bert for training if needed so that |   
    | the embeddings of all layers of Bert are not changed     |
    |__________________________________________________________|
    '''
    # for param in self.bert.parameters():
    #   param.requires_grad = False

  
  def forward(self, batch):

  #   print((batch['input_ids'].squeeze(1)).shape)
  #   print("*"*10)
  #   print(batch['input_ids'])
  #   print("*"*10)
    
    x = self.model(batch['input_ids'], batch['attention_mask'])
    x = self.d1(x)
    x = self.l1(x)
    x = self.bn1(x)
    x = torch.nn.ReLU()(x)
    x = self.d2(x)
    x = self.l2(x)

    return x

  def get_outputs(self, inputs):
    outputs = self.model(**inputs)
    return outputs
    

In [None]:
absa_deberta = ABSADataModule(transformer_tokenizer=tokenizer)

In [None]:
'''
 |¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
 | 3 INIT Trainer   |
 |__________________|
'''
trainer = Trainer.from_argparse_args(args,
    callbacks=[early_stop,lr_monitor, checkpoint_callback]
    )    


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [None]:
trainer.fit(model, absa_deberta)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params
--------------------------------------------
0 | model | ABSAModel_deBERTa | 138 M 
--------------------------------------------
138 M     Trainable params
0         Non-trainable params
138 M     Total params
554.605   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"Your `{mode.dataloader_prefix}_dataloader` has `shuffle=True`,"


RuntimeError: ignored