In [1]:
# !pip install transformers
# !pip install pytorch_lightning

# Imports

In [1]:
import transformers
from torch.utils.data import DataLoader, TensorDataset, random_split, RandomSampler, Dataset
import pandas as pd
import numpy as np

import torch.nn.functional as F
import pytorch_lightning as pl
import torch
import torch.nn as nn
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import loggers as pl_loggers

import math
import random
import re
import argparse
import nltk
import time
from tqdm import tqdm
import os
import pickle
import copy

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device
print(torch.cuda.is_available())

True


In [3]:
path_to_images = '/kaggle/input/exmoreimages/images'

path_to_train = '/kaggle/input/filedataset/train_df.tsv'

path_to_val = '/kaggle/input/filedataset/val_df.tsv'

path_to_test = '/kaggle/input/filedataset/test_df.tsv'

path_to_save_model = ''

## Data Loading

In [4]:
class MSEDataset(Dataset):
    def __init__(self, path_to_data_df, path_to_images, tokenizer, image_transform):
        self.data = pd.read_csv(path_to_data_df, sep='\t', names=['pid', 'text', 'explanation'])
        self.path_to_images = path_to_images
        self.tokenizer = tokenizer
        self.image_transform = image_transform
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx, :]

        pid_i = row['pid']
        src_text = row['text']
        target_text = row['explanation']

        max_length = 256
        encoded_dict = tokenizer(
            src_text,
            max_length=max_length,
            padding="max_length",
            truncation=True,
            return_tensors='pt',
            add_prefix_space = True
        )
        src_ids = encoded_dict['input_ids'][0]
        src_mask = encoded_dict['attention_mask'][0]

        image_path = os.path.join(self.path_to_images, pid_i+'.jpg')
        img = np.array(Image.open(image_path).convert('RGB'))
        img_inp = self.image_transform(img)
        

        encoded_dict = tokenizer(
          target_text,
          max_length=max_length,
          padding="max_length",
          truncation=True,
          return_tensors='pt',
          add_prefix_space = True
        )

        target_ids = encoded_dict['input_ids'][0]

        sample = {
            "input_ids": src_ids,
            "attention_mask": src_mask,
            "input_image": img_inp,
            "target_ids": target_ids,
        }
        return sample
    
    def __len__(self):
        return self.data.shape[0]

In [5]:
class MSEDataModule(pl.LightningDataModule):
    def __init__(self, path_to_train_df, path_to_val_df, path_to_test_df, path_to_images, tokenizer, image_transform, batch_size=16):
        super(MSEDataModule, self).__init__()
        self.path_to_train_df = path_to_train_df
        self.path_to_val_df = path_to_val_df
        self.path_to_test_df = path_to_test_df
        self.path_to_images = path_to_images
        self.batch_size = batch_size
        self.tokenizer = tokenizer
        self.image_transform = image_transform
  
    def setup(self, stage=None):
        self.train_dataset = MSEDataset(self.path_to_train_df, self.path_to_images, self.tokenizer, self.image_transform)
        self.val_dataset = MSEDataset(self.path_to_val_df, self.path_to_images, self.tokenizer, self.image_transform)
        self.test_dataset = MSEDataset(self.path_to_test_df, self.path_to_images, self.tokenizer, self.image_transform)
  
    def train_dataloader(self):
        return DataLoader(self.train_dataset, sampler = RandomSampler(self.train_dataset), batch_size = self.batch_size)
  
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size = self.batch_size)
  
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size = 1)

## Model for Multimodal Sarcasm Detection Pre-training

In [6]:
from transformers import BartTokenizer, BartForConditionalGeneration, BartModel, AdamW, BartConfig, BartPretrainedModel, PreTrainedModel

from dataclasses import dataclass
from typing import Optional, Tuple, List
from transformers.file_utils import ModelOutput

from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple

import torch
from torch.nn import functional as F

from transformers.file_utils import ModelOutput
from transformers.generation.beam_search import BeamScorer, BeamSearchScorer
from transformers.generation.logits_process import (
    HammingDiversityLogitsProcessor,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    NoBadWordsLogitsProcessor,
    NoRepeatNGramLogitsProcessor,
    PrefixConstrainedLogitsProcessor,
    RepetitionPenaltyLogitsProcessor,
    TemperatureLogitsWarper,
    TopKLogitsWarper,
    TopPLogitsWarper,
)

from transformers.utils import logging


logger = logging.get_logger(__name__)

In [7]:
@dataclass
class SequenceClassifierOutput(ModelOutput):
    loss: Optional[torch.FloatTensor] = None
    logits: torch.FloatTensor = None
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
    attentions: Optional[Tuple[torch.FloatTensor]] = None

def getClones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])

def attention(q, k, v, d_k, mask=None, dropout=None):
    scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(d_k)
    if mask is not None:
        mask = mask.unsqueeze(1)
        scores = scores.masked_fill(mask == 0, -1e9)
    scores = F.softmax(scores, dim=-1)
    
    if dropout is not None:
        scores = dropout(scores)
        
    output = torch.matmul(scores, v)
    return output

class CrossmodalMultiHeadAttention(nn.Module):
    def __init__(self, heads, d_model, img_model=512, dropout = 0.1):
        super().__init__()
        self.d_model = d_model
        self.d_k = d_model // heads
        self.h = heads
        
        self.q_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(img_model, d_model)
        self.k_linear = nn.Linear(img_model, d_model)
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(d_model, d_model)
    
    def forward(self, q, k, v, mask=None):
        
        bs = q.size(0)
        
        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
        
        k = k.transpose(1,2)
        q = q.transpose(1,2)
        v = v.transpose(1,2)
        
        scores = attention(q, k, v, self.d_k, mask, self.dropout)
        
        concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model)
        
        output = self.out(concat)

        return output

class FeedForward(nn.Module):
    def __init__(self, d_model, d_ff=2048, dropout = 0.1):
        super().__init__() 
        #d_ff is set as default to 2048
        self.linear_1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff, d_model)
    
    def forward(self, x):
        x = self.dropout(F.relu(self.linear_1(x)))
        x = self.linear_2(x)
        return x

class Norm(nn.Module):
    def __init__(self, d_model, eps = 1e-6):
        super().__init__()
        self.size = d_model
        self.alpha = nn.Parameter(torch.ones(self.size))
        self.bias = nn.Parameter(torch.zeros(self.size))
        self.eps = eps
    
    def forward(self, x):
        norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
        return norm

class CrossmodalEncoderLayer(nn.Module):
    def __init__(self, d_model, heads, img_model=512, dropout = 0.1):
        super().__init__()
        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.attn = CrossmodalMultiHeadAttention(heads, d_model, img_model=img_model)
        self.ff = FeedForward(d_model)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        
    def forward(self, text_feats, img_feats, mask):
        x = text_feats
        x2 = self.norm_1(x)
        x = x + self.dropout_1(self.attn(x2,img_feats,img_feats))
        x2 = self.norm_2(x)
        x = x + self.dropout_2(self.ff(x2))
        return x

class CrossmodalEncoder(nn.Module):
    def __init__(self, d_model, img_model=512, heads=4, N=1, dropout=0.1):
        super(CrossmodalEncoder, self).__init__()
        self.N = N
        self.cme_layers = getClones(CrossmodalEncoderLayer(d_model, heads, img_model=img_model, dropout=dropout), N)
        self.norm = Norm(d_model)
    
    def forward(self, text_feats, img_feats, mask):
        x = text_feats
        for i in range(self.N):
            x = self.cme_layers[i](x, img_feats, mask)
        return self.norm(x)

class MultimodalBartEncoder(PreTrainedModel):
    def __init__(self, bart_encoder, bart_config, image_encoder, img_model=512, N=1, heads=4, dropout=0.1):
        super(MultimodalBartEncoder, self).__init__(bart_config)
        self.config = bart_config
        self.bart_encoder = bart_encoder
        self.image_encoder = image_encoder
        self.N=N
        self.img_model = img_model
        self.cross_modal_encoder = CrossmodalEncoder(self.config.d_model, img_model=img_model, heads=heads, N=N, dropout=dropout)
    
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        inputs_embeds=None,
        image_features=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        ):
            return_dict = return_dict if return_dict is not None else self.config.use_return_dict
            
            vgg_image_features = self.image_encoder(image_features)
            
            vgg_image_features = vgg_image_features.permute(0, 2, 3, 1)
            vgg_image_features = vgg_image_features.view(
                -1, 
                vgg_image_features.size()[1]*vgg_image_features.size()[2], 
                self.img_model
                )
            
            encoder_outputs = self.bart_encoder(
                input_ids=input_ids,
                attention_mask=attention_mask,
                inputs_embeds=inputs_embeds,
                output_attentions=output_attentions,
                output_hidden_states=output_hidden_states,
                return_dict=return_dict,
            )
            
            cross_modal_encoder_outputs = self.cross_modal_encoder(
                encoder_outputs.last_hidden_state, 
                vgg_image_features,
                attention_mask
            )
            
            encoder_outputs.last_hidden_state = torch.cat((encoder_outputs.last_hidden_state, cross_modal_encoder_outputs), dim=-2)
            return encoder_outputs

class BartClassificationHead(nn.Module):
    """Head for sentence-level classification tasks."""
    def __init__(
        self,
        input_dim: int,
        inner_dim: int,
        num_classes: int,
        pooler_dropout: float,
    ):
        super().__init__()
        self.dense = nn.Linear(input_dim, inner_dim)
        self.dropout = nn.Dropout(p=pooler_dropout)
        self.out_proj = nn.Linear(inner_dim, num_classes)

    def forward(self, hidden_states: torch.Tensor):
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.dense(hidden_states)
        hidden_states = torch.tanh(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.out_proj(hidden_states)
        return hidden_states

class BartForMultimodalSarcasmDetection(BartPretrainedModel):
    def __init__(self, bart_model_encoder, bart_config, image_encoder, num_labels=2, dropout_rate=0.1, img_model=512, N=1, heads=4):
        super(BartForMultimodalSarcasmDetection, self).__init__(bart_config)
        self.config = bart_config
        self.encoder = MultimodalBartEncoder(bart_model_encoder, bart_config, image_encoder, img_model=img_model, N=N, heads=heads, dropout=dropout_rate)
        self.classification_head = BartClassificationHead(
            self.config.d_model,
            self.config.d_model,
            num_labels,
            dropout_rate,
        )
        self._init_weights(self.classification_head.dense)
        self._init_weights(self.classification_head.out_proj)
    
    def get_encoder(self):
        return self.encoder
    
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        past_key_values=None,
        inputs_embeds=None,
        image_features = None,
        use_cache=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None
    ):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        encoder_outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask,
            inputs_embeds=inputs_embeds,
            image_features=image_features,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        classification_feature_vector = encoder_outputs.last_hidden_state.mean(dim=-2)
        logits = self.classification_head(classification_feature_vector)
        loss = None
        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=encoder_outputs.last_hidden_state,
            attentions=encoder_outputs.attentions,
        )






## Pytorch Lightning model for Multimodal Sarcasm Detection Pre-training

In [8]:
class PyLitModel(pl.LightningModule):
    def __init__(self, model, hparams):
        super().__init__()
        self.model = model
        self.hparams.update(hparams)

        if self.hparams['freeze_encoder']:
            freeze_params(self.model.encoder.bart_encoder)

        if self.hparams['freeze_embeds']:
            self.freeze_embeds()
    
    def freeze_embeds(self):
        ''' freeze the positional embedding parameters of the model; adapted from finetune.py '''
        freeze_params(self.model.bart_model_shared)
        for d in [self.model.encoder.bart_encoder, self.model.decoder]:
            freeze_params(d.embed_positions)
            freeze_params(d.embed_tokens)

    def forward(self, input_ids, **kwargs):
        return self.model(input_ids, **kwargs)
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(
            [
                {"params": self.model.encoder.cross_modal_encoder.parameters(), "lr": self.hparams['lr']},
                {"params": self.model.classification_head.parameters(), "lr": self.hparams['lr']},
            ],
        )
        return optimizer

    def training_step(self, batch, batch_idx):
        src_ids, src_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
        image_features = batch['input_image'].to(device)
        labels = batch['target_ids'].to(device)
        
        outputs = self(src_ids, attention_mask=src_mask, image_features=input_images, use_cache=False)
        classification_logits = outputs.logits
        
        # The loss function
        ce_loss = torch.nn.CrossEntropyLoss() #ignore_index=self.tokenizer.pad_token_id)
        
        # Calculate the loss on the un-shifted tokens
        loss = ce_loss(classification_logits.view(-1, classification_logits.shape[-1]), labels.view(-1))
        
        self.log('train_cross_entropy_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss':loss}

    def validation_step(self, batch, batch_idx):

        src_ids = batch['input_ids'].to(device)
        src_mask = batch['attention_mask'].to(device)
        image_features = batch['input_image'].to(device)
        labels = batch['target_ids'].to(device)
                
        outputs = self(src_ids, attention_mask=src_mask, image_features=input_images, use_cache=False)
        classification_logits = outputs.logits

        ce_loss = torch.nn.CrossEntropyLoss() #ignore_index=self.tokenizer.pad_token_id)
        val_loss = ce_loss(classification_logits.view(-1, classification_logits.shape[-1]), labels.view(-1))
        
        self.log('val_cross_entropy_loss', val_loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_f1_score', f1(F.softmax(classification_logits, dim=1), labels, num_classes=2), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': val_loss}
    
    def predict(self, src_ids, src_mask, input_images):
        src_ids = src_ids.to(device)
        src_mask = src_mask.to(device)
        input_images = input_images.to(device)

        outputs = self(src_ids, attention_mask=src_mask, input_images=input_images, use_cache=False)
        classification_logits = outputs.logits
        class_probs = F.softmax(classification_logits, dim=1)
        return torch.argmax(class_probs, dim=1)

## Main Model - ExMore

In [9]:
def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int):
    """
    Shift input ids one token to the right, and wrap the last non pad token (usually <eos>).
    """
    prev_output_tokens = input_ids.clone()

    assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined."
    # replace possible -100 values in labels by `pad_token_id`
    prev_output_tokens.masked_fill_(prev_output_tokens == -100, pad_token_id)

    index_of_eos = (prev_output_tokens.ne(pad_token_id).sum(dim=1) - 1).unsqueeze(-1)
    decoder_start_tokens = prev_output_tokens.gather(1, index_of_eos).squeeze()
    prev_output_tokens[:, 1:] = prev_output_tokens[:, :-1].clone()
    prev_output_tokens[:, 0] = decoder_start_tokens

    return prev_output_tokens

@dataclass
class Seq2SeqLMOutput(ModelOutput):
    loss: Optional[torch.FloatTensor] = None
    logits: torch.FloatTensor = None
    past_key_values: Optional[List[torch.FloatTensor]] = None
    decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
    decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
    cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
    encoder_last_hidden_state: Optional[torch.FloatTensor] = None
    encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
    encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
    

class BartForMultimodalSarcasmExplanation(BartPretrainedModel):
    def __init__(self, multimodal_bart_encoder_TL, bart_decoder, bart_config, bart_model_num_embs, img_model=512, N=1, heads=4):
        super(BartForMultimodalSarcasmExplanation, self).__init__(bart_config)
        self.config = bart_config
        self.encoder = multimodal_bart_encoder_TL
        self.decoder = bart_decoder
        self.lm_head = nn.Linear(self.config.d_model, bart_model_num_embs) #, bias=False)
        
        self._init_weights(self.lm_head)
    
    def get_encoder(self):
        return self.encoder
    
    def get_decoder(self):
        return self.decoder
    
    def prepare_inputs_for_generation(
        self,
        decoder_input_ids, past=None, 
        attention_mask=None, 
        use_cache=None, 
        encoder_outputs=None, 
        image_features=None,
        **kwargs
    ):
        # cut decoder_input_ids if past is used
        if past is not None:
            decoder_input_ids = decoder_input_ids[:, -1:]

        return {
            "input_ids": None,  # encoder_outputs is defined. input_ids not needed
            "encoder_outputs": encoder_outputs,
            "image_features": image_features,
            "past_key_values": past,
            "decoder_input_ids": decoder_input_ids,
            "attention_mask": attention_mask,
            "use_cache": use_cache,  # change this to avoid caching (presumably for debugging)
        }
    
    #def adjust_logits_during_generation(self, logits, cur_len, max_length):
    #    if cur_len == 1 and self.config.force_bos_token_to_be_generated:
    #        self._force_token_id_to_be_generated(logits, self.config.bos_token_id)
    #    elif cur_len == max_length - 1 and self.config.eos_token_id is not None:
    #        self._force_token_id_to_be_generated(logits, self.config.eos_token_id)
    #    return logits

    @staticmethod
    def _force_token_id_to_be_generated(scores, token_id) -> None:
        """force one of token_ids to be generated by setting prob of all other tokens to 0 (logprob=-float("inf"))"""
        scores[:, [x for x in range(scores.shape[1]) if x != token_id]] = -float("inf")

    @staticmethod
    def _reorder_cache(past, beam_idx):
        reordered_past = ()
        for layer_past in past:
            reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
        return reordered_past
    
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        decoder_input_ids=None,
        decoder_attention_mask=None,
        encoder_outputs=None,
        past_key_values=None,
        inputs_embeds=None,
        decoder_inputs_embeds=None,

        image_features = None,
        
        use_cache=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None
    ):
        use_cache = use_cache if use_cache is not None else self.config.use_cache
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        if decoder_input_ids is None and decoder_inputs_embeds is None:
            decoder_input_ids = shift_tokens_right(input_ids, self.config.pad_token_id)
        
        if encoder_outputs is None:
            encoder_outputs = self.encoder(
                input_ids=input_ids,
                attention_mask=attention_mask,
                inputs_embeds=inputs_embeds,
                image_features=image_features,
                output_attentions=output_attentions,
                output_hidden_states=output_hidden_states,
                return_dict=return_dict,
            )
        
        enc_attn_mask = torch.cat((attention_mask, attention_mask), dim=-1)
        
        decoder_outputs = self.decoder(
            input_ids=decoder_input_ids,
            attention_mask=decoder_attention_mask,
            encoder_hidden_states=encoder_outputs.last_hidden_state,
            encoder_attention_mask=enc_attn_mask,
            past_key_values=past_key_values,
            inputs_embeds=decoder_inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict
        )

        lm_logits = self.lm_head(decoder_outputs.last_hidden_state)
        
        masked_lm_loss = None
        return Seq2SeqLMOutput(
            loss=masked_lm_loss,
            logits=lm_logits,
            past_key_values=past_key_values,
            decoder_hidden_states=decoder_outputs.hidden_states,
            decoder_attentions=decoder_outputs.attentions,
            cross_attentions=decoder_outputs.cross_attentions,
            encoder_last_hidden_state=encoder_outputs.last_hidden_state, # also carries crossmodal_encoder_last_hidden_state concatenated.
            encoder_hidden_states=encoder_outputs.hidden_states,
            encoder_attentions=encoder_outputs.attentions,
        )




##  Pytorch Lightning - Main Model - ExMore

In [10]:
class PyLitBartForMultimodalSarcasmExplanation(pl.LightningModule):
    def __init__(self, model, tokenizer, hparams):
        super().__init__()
        self.tokenizer = tokenizer
        self.model = model
        self.hparams.update(hparams)
        
        if self.hparams['freeze_image_encoder']:
            freeze_params(self.model.encoder.image_encoder)
        
        if self.hparams['freeze_encoder']:
            freeze_params(self.model.encoder.bart_encoder)

        if self.hparams['freeze_embeds']:
            self.freeze_embeds()
  
    def freeze_embeds(self):
        ''' freeze the positional embedding parameters of the model; adapted from finetune.py '''
        freeze_params(self.model.bart_model_shared)
        for d in [self.model.encoder.bart_encoder, self.model.decoder]:
            freeze_params(d.embed_positions)
            freeze_params(d.embed_tokens)

    def forward(self, input_ids, **kwargs):
        return self.model(input_ids, **kwargs)
  
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(
          [
              {"params": self.model.encoder.cross_modal_encoder.parameters(), "lr": self.hparams['lr_finetune_cm']},
              {"params": self.model.lm_head.parameters(), "lr": self.hparams['lr']},
          ],
        )
        return optimizer

    def training_step(self, batch, batch_idx):
        
        src_ids, src_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
        image_features = batch['input_image'].to(device)
        tgt_ids = batch['target_ids'].to(device)
        
        # Shift the decoder tokens right (but NOT the tgt_ids)
        decoder_input_ids = shift_tokens_right(tgt_ids, tokenizer.pad_token_id)

        # Run the model and get the logits
        outputs = self(src_ids, attention_mask=src_mask, image_features=image_features, decoder_input_ids=decoder_input_ids, use_cache=False)
        lm_logits = outputs.logits
        
        # the loss function
        ce_loss_fct = torch.nn.CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
        
        # Calculate the loss on the un-shifted tokens
        loss = ce_loss_fct(lm_logits.view(-1, lm_logits.shape[-1]), tgt_ids.view(-1))
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss':loss}

    def validation_step(self, batch, batch_idx):
        src_ids = batch['input_ids'].to(device)
        src_mask = batch['attention_mask'].to(device)
        image_features = batch['input_image'].to(device)
        tgt_ids = batch['target_ids'].to(device)
        
        decoder_input_ids = shift_tokens_right(tgt_ids, tokenizer.pad_token_id)

        # Run the model and get the logits
        outputs = self(src_ids, attention_mask=src_mask, image_features=image_features, decoder_input_ids=decoder_input_ids, use_cache=False)
        lm_logits = outputs.logits

        ce_loss_fct = torch.nn.CrossEntropyLoss(ignore_index=self.tokenizer.pad_token_id)
        val_loss = ce_loss_fct(lm_logits.view(-1, lm_logits.shape[-1]), tgt_ids.view(-1))
        self.log('val_loss', val_loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': val_loss}
  
    # This method generates text using the BartForConditionalGeneration's generate() method
    def generate_text(self, text, eval_beams, image_features=None, early_stopping = True, max_len = 40):
        ''' Function to generate text '''
        
        model_kwargs = {
            "image_features": image_features
        }
        generated_ids = self.model.generate(
            text["input_ids"],
            attention_mask=text["attention_mask"],
            use_cache=True,
            decoder_start_token_id = self.tokenizer.pad_token_id,
            num_beams= eval_beams,
            max_length = max_len,
            early_stopping = early_stopping,
            **model_kwargs,
        )
        return [self.tokenizer.decode(w, skip_special_tokens=True, clean_up_tokenization_spaces=True) for w in generated_ids]

def freeze_params(model):
    ''' This function takes a model or its subset as input and freezes the layers for faster training
      adapted from finetune.py '''
    for layer in model.parameters():
        layer.requires_grade = False

## Load Model

In [11]:
def load_image_encoder():
    vgg19model = models.vgg19(pretrained=True)
    image_encoder = list(vgg19model.children())[0]
    return image_encoder

image_transform = transforms.Compose([
    transforms.ToTensor(),                               
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

In [12]:
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base', add_prefix_space=True, attn_implementation="eager")

bart_model = BartModel.from_pretrained('facebook/bart-base', attn_implementation="eager")

bart_config = BartConfig.from_pretrained("facebook/bart-base", return_dict=True, attn_implementation="eager")


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

In [13]:
image_encoder = load_image_encoder()

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:02<00:00, 212MB/s]  


In [17]:
hparams = {
    'freeze_encoder': False,
    'freeze_embeds': False,
    'freeze_image_encoder': True,
    'eval_beams': 4,
    'lr_finetune_cm':1e-5, #for crossmodal encoder
    'lr': 3e-4, #for lm_head
}

In [18]:
bart_model_for_msd = BartForMultimodalSarcasmDetection(
    bart_model.get_encoder(), 
    bart_config, 
    image_encoder, 
    num_labels=2,
    dropout_rate=0.1,
    img_model=512,
    N=1,
    heads=4,
)


In [20]:
msd_checkpoint_path = '/kaggle/input/checkpooijtttt/MSD_pretrained_model.ckpt'
pylit_bart_model_for_msd = PyLitModel.load_from_checkpoint(checkpoint_path=msd_checkpoint_path, 
                                      model = bart_model_for_msd, 
                                      hparams = hparams)

In [22]:
multimodal_bart_encoder_TL = pylit_bart_model_for_msd.model.get_encoder()
bart_decoder = bart_model.get_decoder()

bart_model_num_embs = bart_model.shared.num_embeddings

In [23]:
bart_for_mse = BartForMultimodalSarcasmExplanation(multimodal_bart_encoder_TL, 
                                            bart_decoder, bart_config, 
                                            bart_model_num_embs, img_model=512, N=1, heads=4)

In [24]:
# Load the data into the model for training

mse_data = MSEDataModule(path_to_train, path_to_val, 
                         path_to_test, path_to_images, 
                         tokenizer, image_transform, batch_size=16)

In [25]:
# Load the model from a pre-saved checkpoint or use the code below to start training from scratch

main_model = PyLitBartForMultimodalSarcasmExplanation(tokenizer = tokenizer, model = bart_for_mse, hparams = hparams)

# model = PyLitBartForMultimodalSarcasmExplanation.load_from_checkpoint(checkpoint_path="ckpt path",
                                    #   tokenizer = tokenizer, model = bart_for_mse, hparams = hparams)

# Training the model with Pytorch Lightning

In [26]:
import os
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger  # Correct import

ckpt_dir = os.path.join(path_to_save_model, 'model_dir')
checkpoint = ModelCheckpoint(
    dirpath=ckpt_dir,
    monitor='val_loss',
    save_top_k=15,
    mode='min',
    filename='{epoch}-{val_loss:.3f}'
)
tb_logger = TensorBoardLogger(os.path.join(ckpt_dir, 'logs/'))  # Use the correct logger

trainer = pl.Trainer(
    logger=tb_logger,
#     gpus=1,
    max_epochs=125,
    min_epochs=5,
#     auto_lr_find=False,
    callbacks=[checkpoint],  # Update this to use `callbacks` instead of `checkpoint_callback`
#     progress_bar_refresh_rate=10
)


In [54]:
# import os
# import shutil

# # Path to the model directory
# model_dir = "/kaggle/working/model_dir"

# # Loop through the files in the directory
# for file_name in os.listdir(model_dir):
#     if file_name.endswith(".ckpt"):  # Check if the file is a checkpoint file
#         file_path = os.path.join(model_dir, file_name)
#         os.remove(file_path)  # Remove the checkpoint file

# print("Checkpoint files removed. Logs are intact.")


Checkpoint files removed. Logs are intact.


In [28]:
# # Fit the instantiated model to the data
trainer.fit(main_model, mse_data)


In [24]:
# This is to manually save a checkpoint, although the model should automatically save checkpoints as it moves through the epochs
trainer.save_checkpoint(os.path.join(ckpt_dir,"last_epoch_125.ckpt"))

## Predict

In [29]:
ckpt_path = '/kaggle/input/finalcheckpoint/ExMore_model.ckpt'
main_model = PyLitBartForMultimodalSarcasmExplanation.load_from_checkpoint(checkpoint_path=ckpt_path,strict=False,
                                      tokenizer = tokenizer, model = bart_for_mse, hparams = hparams)

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/utilities/migration/migration.py:208: You have multiple `ModelCheckpoint` callback states in this checkpoint, but we found state keys that would end up colliding with each other after an upgrade, which means we can't differentiate which of your checkpoint callbacks needs which states. At least one of your `ModelCheckpoint` callbacks will not be able to reload the state.


In [30]:
test = pd.read_csv(path_to_test, sep='\t', header=None)
# test = pd.read_csv(path_to_train, sep='\t', header=None)
test.columns = ['pid', 'source', 'target']
pids = test.pid.tolist()
source = test.source.tolist()
target = test.target.tolist()

In [31]:
main_model.to(device)
main_model.eval()
print("")




In [34]:
from tqdm import tqdm

eval_beams = 4
pred = []

# Wrap zip(pids, source, target) with tqdm for a progress bar
for pid_i, src, tgt in tqdm(zip(pids, source, target), total=len(pids), desc="Processing", leave=False):
    encoded_dict = tokenizer(
        src,
        max_length=256,
        padding="max_length",
        truncation=True,
        return_tensors='pt',
        add_prefix_space=True
    )
    encoded_dict['input_ids'] = encoded_dict['input_ids'].to(device)
    encoded_dict['attention_mask'] = encoded_dict['attention_mask'].to(device)

    if type(pid_i) is not str:
        pid_i = str(pid_i)

    image_path = os.path.join(path_to_images, pid_i + '.jpg')
    img = np.array(Image.open(image_path).convert('RGB'))
    img_feats = image_transform(img).unsqueeze(0)

    gen = main_model.generate_text(
        encoded_dict, 
        eval_beams, 
        image_features=img_feats.to(device), 
        early_stopping=True, 
        max_len=256
    )

    pred.append(gen[0])
    hypothesis = gen[0].split()
    reference = tgt.split()



Processing:   0%|          | 0/352 [00:00<?, ?it/s][A
Processing:   0%|          | 1/352 [00:00<02:05,  2.79it/s][A
Processing:   1%|          | 2/352 [00:00<01:45,  3.31it/s][A
Processing:   1%|          | 3/352 [00:00<01:51,  3.13it/s][A
Processing:   1%|          | 4/352 [00:01<02:15,  2.58it/s][A
Processing:   1%|▏         | 5/352 [00:01<02:05,  2.77it/s][A
Processing:   2%|▏         | 6/352 [00:02<01:52,  3.06it/s][A
Processing:   2%|▏         | 7/352 [00:02<02:09,  2.67it/s][A
Processing:   2%|▏         | 8/352 [00:02<01:55,  2.98it/s][A
Processing:   3%|▎         | 9/352 [00:03<01:54,  3.00it/s][A
Processing:   3%|▎         | 10/352 [00:03<02:09,  2.65it/s][A
Processing:   3%|▎         | 11/352 [00:03<01:56,  2.94it/s][A
Processing:   3%|▎         | 12/352 [00:04<01:58,  2.86it/s][A
Processing:   4%|▎         | 13/352 [00:04<01:52,  3.01it/s][A
Processing:   4%|▍         | 14/352 [00:04<01:45,  3.22it/s][A
Processing:   4%|▍         | 15/352 [00:05<01:46,  3.16it

## Evaluate

In [36]:
predictions_1 = pd.DataFrame({0:pids, 1:source, 2:target, 3:pred})
predictions_1
# print(pred)
predictions = pd.DataFrame({0:target, 1:pred})
predictions

Unnamed: 0,0,1
0,the author hates the design of this convention...,the author is pissed at <user> for having a g...
1,the author hates working late from home.,the author hates having to work late from home.
2,"your anxiety is not cured when someone says ""d...",the author is pissed at <user> for not fixing...
3,the author is pissed to watch a full train lea...,the author is pissed at <user> for having to ...
4,the author doesn't find such notifications fro...,the author is pissed at <user> for not gettin...
...,...,...
347,it isn't a cool week if it's 100 degrees.,it's very annoying when you're being sarcastic.
348,"she's exactly like her dad, both are making si...",the author had fun with his mom.
349,the author is disappointed with this eclipse s...,this eclipse isn't even a good idea.
350,<user> app radar isn't right on target.,the author's disappointed with the app.


In [38]:
path_to_predictions = 'final_outputs'
predictions_1.to_csv(path_to_predictions, sep='\t', index=False, header=False)

In [43]:
# !pip install rouge-score
# !pip install bert-score
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.1


In [44]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
import pandas as pd
from bert_score import score
import torch
from sentence_transformers import SentenceTransformer
from scipy import spatial

nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [45]:
device = torch.device('cpu')

In [46]:
sentence_transformer_model = SentenceTransformer('bert-base-nli-mean-tokens')
sentence_transformer_model.to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.99k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/399 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

In [None]:
path_to_preds = ""
predictions = pd.read_csv(path_to_preds, sep="\t", header=None)
predictions.columns = ['pid', 'source', 'reference', 'hypothesis']
y_true = predictions.reference.tolist()
y_pred = predictions.hypothesis.tolist()

In [48]:
test_ocr_df

Unnamed: 0,0,1,2,3
0,685491413409112065,'nothing better than # design of convention ce...,the author hates the design of this convention...,the author is pissed at <user> for having a g...
1,700183392969756672,'oh i so love working late from home # work #...,the author hates working late from home.,the author hates having to work late from home.
2,928753954745475072,'yeaah ! buddy o miracle worker # infj emoji_...,"your anxiety is not cured when someone says ""d...",the author is pissed at <user> for not fixing...
3,935133439011049473,'rt <user> : something different ..... a delay...,the author is pissed to watch a full train lea...,the author is pissed at <user> for having to ...
4,933466049697198080,'oh really linkedin ? thanks for the super use...,the author doesn't find such notifications fro...,the author is pissed at <user> for not gettin...
...,...,...,...,...
347,1011850445043480900_1580447253,âï¸âï¸âï¸âï¸âï¸âï¸âï¸âï...,it isn't a cool week if it's 100 degrees.,it's very annoying when you're being sarcastic.
348,636237012294327831_256939246,She's nothing like her dad... ;) #lovethem #fa...,"she's exactly like her dad, both are making si...",the author had fun with his mom.
349,899685897251069952,'this eclipse is even cooler than i thought it...,the author is disappointed with this eclipse s...,this eclipse isn't even a good idea.
350,878368201221914624,'<user> app radar is definitely right on targe...,<user> app radar isn't right on target.,the author's disappointed with the app.


In [52]:
predictions

Unnamed: 0,0,1
0,the author hates the design of this convention...,the author is pissed at <user> for having a g...
1,the author hates working late from home.,the author hates having to work late from home.
2,"your anxiety is not cured when someone says ""d...",the author is pissed at <user> for not fixing...
3,the author is pissed to watch a full train lea...,the author is pissed at <user> for having to ...
4,the author doesn't find such notifications fro...,the author is pissed at <user> for not gettin...
...,...,...
347,it isn't a cool week if it's 100 degrees.,it's very annoying when you're being sarcastic.
348,"she's exactly like her dad, both are making si...",the author had fun with his mom.
349,the author is disappointed with this eclipse s...,this eclipse isn't even a good idea.
350,<user> app radar isn't right on target.,the author's disappointed with the app.


In [79]:
path_to_test_ocr_df = '/kaggle/working/final_outputs'
test_ocr_df = pd.read_csv(path_to_test_ocr_df, sep="\t", header=None)
# columns, assign 4 column names
test_ocr_df.columns = ['pid', 'caption', 'source', 'reference'] 

# predictions_ocr = predictions[predictions['pid'].isin(test_ocr_df['pid'])]
predictions.columns = ['reference', 'hypothesis']

# Convert the 'reference' and 'hypothesis' columns to lists directly
y_true = predictions['reference'].tolist()
y_pred = predictions['hypothesis'].tolist()

In [None]:
#predictions_ocr.to_csv("path to save predictions_test_ocr.tsv",
#                      sep='\t', index=False, header=False)

In [78]:
# path_to_test_non_ocr_df = 'Enter path to the test_set_non_ocr.tsv file to compute evaluation scores'
# test_non_ocr_df = pd.read_csv(path_to_test_non_ocr_df, sep="\t", header=None)
# test_non_ocr_df.columns = ['pid', 'source', 'reference']

# predictions_non_ocr = predictions[predictions['pid'].isin(test_non_ocr_df['pid'])]
# y_true = predictions_non_ocr.reference.tolist()
# y_pred = predictions_non_ocr.hypothesis.tolist()

In [72]:
# y_pred

In [None]:
#predictions_non_ocr.to_csv("path to save predictions_test_non_ocr.tsv",
#                          sep='\t', index=False, header=False)

In [81]:
# import nltk
# from nltk.translate.bleu_score import sentence_bleu
# from rouge import Rouge
# from nltk.translate.meteor_score import meteor_score
# from bert_score import score
# from nltk.tokenize import word_tokenize

# # Ensure you have the necessary NLTK data
# nltk.download('punkt')
# nltk.download('wordnet')

# def preprocess(text):
#     return word_tokenize(text.lower())

# def calculate_metrics(y_true, y_pred):
#     bleu_scores = []
#     rouge_scores = []
#     meteor_scores = []
#     bert_scores = {'precision': [], 'recall': [], 'f1': []}
    
#     rouge = Rouge()
    
#     for true, pred in zip(y_true, y_pred):
#         # Preprocess the texts
#         true_tokens = preprocess(true)
#         pred_tokens = preprocess(pred)
        
#         # BLEU
#         bleu_scores.append(sentence_bleu([true_tokens], pred_tokens))
        
#         # ROUGE
#         rouge_score = rouge.get_scores(pred, true)[0]
#         rouge_scores.append(rouge_score['rouge-l']['f'])
        
#         # METEOR
#         meteor_scores.append(meteor_score([true_tokens], pred_tokens))
        
#         # BERTScore
#         p, r, f1 = score([pred], [true], lang="en", verbose=False)
#         bert_scores['precision'].append(p.item())
#         bert_scores['recall'].append(r.item())
#         bert_scores['f1'].append(f1.item())
    
#     return {
#         'BLEU': sum(bleu_scores) / len(bleu_scores),
#         'ROUGE-L': sum(rouge_scores) / len(rouge_scores),
#         'METEOR': sum(meteor_scores) / len(meteor_scores),
#         'BERTScore': {
#             'Precision': sum(bert_scores['precision']) / len(bert_scores['precision']),
#             'Recall': sum(bert_scores['recall']) / len(bert_scores['recall']),
#             'F1': sum(bert_scores['f1']) / len(bert_scores['f1'])
#         }
#     }



# results = calculate_metrics(y_true, y_pred)
# print(results)

In [85]:
# !pip install sacrebleu
# !!pip install -U nltk
# !pip install bert-score
# !pip install rouge
# !pip install evaluate

In [87]:
import evaluate
from nltk.tokenize import sent_tokenize

In [89]:
rouge_score = evaluate.load("rouge")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [100]:
from tqdm import tqdm
from statistics import mean

def compute_rouge_score(generated, reference):
    # Add '\n' to each line before sending it to ROUGE
    generated_with_newlines = ["\n".join(sent_tokenize(s.strip())) for s in generated]
    reference_with_newlines = ["\n".join(sent_tokenize(s.strip())) for s in reference]
    
    return rouge_score.compute(
        predictions=generated_with_newlines,
        references=reference_with_newlines,
        use_stemmer=True,
    )

# Initialize empty lists to store the ROUGE scores
rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []

total = len(y_pred)  # Total number of predictions

# Use a single tqdm progress bar
with tqdm(total=total, position=0, leave=True, desc="Computing ROUGE scores") as pbar:
    for i in range(total):
        # Compute the ROUGE score for each pair
        score = compute_rouge_score([y_pred[i]], [y_true[i]])
        
        # Append individual ROUGE scores directly to their respective lists
        rouge_1_scores.append(score['rouge1'])  # Assuming score['rouge1'] is a float
        rouge_2_scores.append(score['rouge2'])  # Assuming score['rouge2'] is a float
        rouge_l_scores.append(score['rougeL'])   # Assuming score['rougeL'] is a float
        
        # Update the progress bar
        pbar.update(1)

# Compute the average of each ROUGE score
avg_rouge_1 = mean(rouge_1_scores)
avg_rouge_2 = mean(rouge_2_scores)
avg_rouge_l = mean(rouge_l_scores)

# Print the final average ROUGE scores
print(f"Average ROUGE-1 F1 Score: {avg_rouge_1:.4f}")
print(f"Average ROUGE-2 F1 Score: {avg_rouge_2:.4f}")
print(f"Average ROUGE-L F1 Score: {avg_rouge_l:.4f}")


Computing ROUGE scores: 100%|██████████| 352/352 [01:08<00:00,  5.11it/s]

Average ROUGE-1 F1 Score: 0.2717
Average ROUGE-2 F1 Score: 0.1216
Average ROUGE-L F1 Score: 0.2466





In [101]:
import evaluate
from tqdm import tqdm
from statistics import mean

# Load the METEOR evaluator
meteor = evaluate.load('meteor')

# Example predictions and references
predictions = y_pred
references = y_true

# Initialize an empty list to store the METEOR scores
meteor_scores = []

total = len(predictions)  # Total number of predictions

# Use a single tqdm progress bar
with tqdm(total=total, position=0, leave=True, desc="Computing METEOR scores") as pbar:
    for i in range(total):
        # Compute the METEOR score for each prediction and reference pair
        result = meteor.compute(predictions=[predictions[i]], references=[references[i]])
        
        # Append the METEOR score to the list
        meteor_scores.append(result['meteor'])  # Assuming result['meteor'] is a float
        
        # Update the progress bar
        pbar.update(1)

# Compute the average METEOR score
avg_meteor = mean(meteor_scores)

# Print the final average METEOR score
print(f"Average METEOR Score: {avg_meteor:.4f}")


Downloading builder script:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Computing METEOR scores: 100%|██████████| 352/352 [00:04<00:00, 75.32it/s] 

Average METEOR Score: 0.2685





In [102]:
import evaluate
from tqdm import tqdm
from statistics import mean

# Load the BLEU evaluator
bleu = evaluate.load("bleu")


# Initialize an empty list to store the BLEU scores
bleu_scores = []

total = len(predictions)  # Total number of predictions

# Use a single tqdm progress bar
with tqdm(total=total, position=0, leave=True, desc="Computing BLEU scores") as pbar:
    for i in range(total):
        # Compute the BLEU score for each prediction and reference pair
        result = bleu.compute(predictions=[predictions[i]], references=[references[i]])
        
        # Append the BLEU score to the list
        bleu_scores.append(result['bleu'])  # Assuming result['bleu'] is a float
        
        # Update the progress bar
        pbar.update(1)

# Compute the average BLEU score
avg_bleu = mean(bleu_scores)

# Print the final average BLEU score
print(f"Average BLEU Score: {avg_bleu:.4f}")


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Computing BLEU scores: 100%|██████████| 352/352 [00:01<00:00, 202.14it/s]

Average BLEU Score: 0.0541





In [107]:
from evaluate import load
from tqdm import tqdm
from statistics import mean

# Load the BERTScore evaluator
bertscore = load("bertscore")



# Initialize lists to store the BERT scores
f1_scores = []

total = len(predictions)  # Total number of predictions

# Use a single tqdm progress bar
with tqdm(total=total, position=0, leave=True, desc="Computing BERTScores") as pbar:
    for i in range(total):
        # Compute the BERTScore for each prediction and reference pair
        result = bertscore.compute(predictions=[predictions[i]], references=[references[i]], model_type="distilbert-base-uncased")
        
        # Append the F1 score to the list
        f1_scores.append(result['f1'][0])  # Assuming result['f1'] is a list with the first element being the score
        
        # Update the progress bar
        pbar.update(1)

# Compute the average BERT F1 score
avg_f1 = mean(f1_scores)

# Print the final average BERT F1 score
print(f"Average BERT F1 Score: {avg_f1:.4f}")


Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

Computing BERTScores:   0%|          | 0/352 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Computing BERTScores: 100%|██████████| 352/352 [00:12<00:00, 28.64it/s]

Average BERT F1 Score: 0.7831





In [105]:
print(predictions[:3])
print(references[:3])

[' the author is pissed at <user> for having a good design.', ' the author hates having to work late from home.', ' the author is pissed at <user> for not fixing their half empty glass.']
['the author hates the design of this convention center, it makes him dizzy.', 'the author hates working late from home.', 'your anxiety is not cured when someone says "don\'t be anxious".']
