# Imports

In [None]:
!pip install transformers
# # !pip install torch==1.5.0 
# !pip install torchvision==0.4.0 -f https://download.pytorch.org/whl/torch_stable.html



In [None]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [None]:
import pandas as pd
import pickle
import random
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import os
from enum import Enum
from torch.nn import functional as F
import time
import logging
import numpy as np


from transformers import DistilBertTokenizerFast, DistilBertModel

logger = logging.getLogger(__name__)
random.seed(13)

# Utilities

In [None]:
def pytorch_cos_sim(a: torch.as_tensor, b: torch.as_tensor):
    """
    Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
    This function can be used as a faster replacement for 1-scipy.spatial.distance.cdist(a,b)
    :return: Matrix with res[i][j]  = cos_sim(a[i], b[j])
    """
    if not isinstance(a, torch.Tensor):
        a = torch.as_tensor(a)

    if not isinstance(b, torch.Tensor):
        b = torch.as_tensor(b)

    if len(a.shape) == 1:
        a = a.unsqueeze(0)

    if len(b.shape) == 1:
        b = b.unsqueeze(0)

    a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
    b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
    return torch.mm(a_norm, b_norm.transpose(0, 1))

In [None]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    pd.set_option('display.float_format', '{:20,.2f}'.format)
    pd.set_option('display.max_colwidth', None)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')

# Dataset and Dataloader

### Config

In [None]:
folder_quora = '/gdrive/MyDrive/quora_android_2'
folder = '/gdrive/MyDrive/MultiModal'
img_dir = '/gdrive/MyDrive/MultiModal/data'
BATCH_SIZE = 1
fin_BATCH_SIZE = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


###Loading Data

In [None]:
with open(folder_quora+'/data/splits/pandas_split.txt','rb') as a:
    train_qr=pickle.load(a)
    dev_qr = pickle.load(a)
    test_qr = pickle.load(a)

In [None]:
with open(folder+'/data/splits/data_pos_neg.txt','rb') as a:
    train_data_pos = pickle.load(a)
    train_data_neg = pickle.load(a)
    dev_data_pos = pickle.load(a)
    dev_data_neg = pickle.load(a)
    test_data_pos = pickle.load(a)
    test_data_neg = pickle.load(a)

In [None]:
with open('/gdrive/MyDrive/sbert_supervised(for_data_only)/android_nosw_rearranged.txt','rb') as a:
    score_qr = pickle.load(a)

### Creating Dataset and Dataloader class 

In [None]:
# !pip install sentence-transformers

In [None]:
# from sentence_transformers import models, losses, util, SentenceTransformer, SentencesDataset, InputExample, evaluation
# from sentence_transformers.cross_encoder import CrossEncoder

In [None]:
from PIL import Image
from torchvision import transforms
transform_pipe = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
class custom_dataset(Dataset):
    def __init__(self,qr,qr_idx,img_dir,label,transform = None,score_qr = score_qr):
        self.qr = qr
        self.qr_idx = qr_idx
        self.img_dir = img_dir
        self.label = label
        self.transform = transform
        self.score_qr = score_qr

    def image_adder(self,id1):
        img_id1 = list()
        if((self.qr.at[id1,'Attachments'])!=None):
            for i in self.qr.at[id1,'Attachments']:
                try:
                    img_path = os.path.join(self.img_dir,i)
                    img = Image.open(img_path).convert('RGB')
                    if(self.transform):
                        img = self.transform(img)
                        img.reshape(3,224,224)
                    img_id1.append(img)
                except Exception as e: 
                    print(e)
        else:
            img_id1.append(torch.zeros(3,224,224))

        # Work on this, for few examples, it is still saying list index out of range
        if(len(img_id1)==0):
            # print('No attachments found for id {}'.format(id1))
            print(f'Something went wrong with the image of id {id1}')
            img_id1.append(torch.zeros(3,224,224))

        return img_id1
    
    def __getitem__(self,idx):
        id1 = self.qr_idx[idx][0]
        id2 = self.qr_idx[idx][1]
        img_id1 = self.image_adder(id1)
        img_id2 = self.image_adder(id2)

        # print(len(img_id1))

        # print('Printing id1 {} and len {} and id2 {} and len {} '.format(
        #     id1,len(img_id1),
        #     id2, len(img_id2)
        # ))

        # print('Printing id1 shape {} and id2  shape {}'.format(
        #     img_id1[0].shape,
        #     img_id2[0].shape
        # ))        

        sample = {
            'image': [img_id1[0],img_id2[0]]    #Currently taking only one input image
        }

        t1 = '[CLS]' + self.qr.loc[id1,'Title'] + ' ' + ' '.join(self.qr.loc[id1,'Tags']) + ' ' + self.qr.loc[id1,'Text'] + '[SEP]'
        t2 = '[CLS]' + self.qr.loc[id2,'Title']+ ' ' + ' '.join(self.qr.loc[id2,'Tags']) + ' ' + self.qr.loc[id2,'Text'] + '[SEP]'
        tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        indexed_t1 = tokenizer.encode(t1,max_length = 512,truncation = True)
        indexed_t2 = tokenizer.encode(t2,max_length = 512,truncation = True)
        # t2_token = tokenizer.tokenize(t2)
        # indexed_t1 = tokenizer.convert_tokens_to_ids(t1_token)
        # indexed_t2 = tokenizer.convert_tokens_to_ids(t2_token)

        while(len(indexed_t1)<512):
            indexed_t1.append(0)
        while(len(indexed_t2)<512):
            indexed_t2.append(0)

        ten_t1 = torch.as_tensor(indexed_t1)[:512]
        ten_t2 = torch.as_tensor(indexed_t2)[:512]
        
        # related_score = list()
        for l in self.label:
            try:
                if(l==torch.tensor(1)):
                    related_score=(score_qr.loc[id1,'Related_Scores'][score_qr.loc[id1,'New_Related'].index(id2)])
                else:
                    related_score=(torch.tensor(0.0))
            except:
                related_score = torch.tensor(0.0)

        try:
            sample["label"] = self.label[idx]
            sample["token"] = [ten_t1,ten_t2] # torch.Size([batch_size, 512])
            sample['score'] = related_score
        except Exception as e:
            print(e)
        
        return sample

    def __len__(self):
        return len(self.label)

### Intializing dataset and dataloader

In [None]:
train_dataset = custom_dataset(train_qr,train_data_pos + train_data_neg,
                               img_dir,torch.ones(len(train_data_pos))+
                               torch.zeros(len(train_data_neg)),
                               transform = transform_pipe)

In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    pin_memory=True,
    num_workers = 8,
    shuffle = True
    )

In [None]:
dev_dataset = custom_dataset(pd.concat([train_qr,dev_qr]),dev_data_pos + dev_data_neg,
                               img_dir,torch.ones(len(dev_data_pos))+
                               torch.zeros(len(dev_data_neg)),
                               transform = transform_pipe)

In [None]:
dev_loader = DataLoader(
    dev_dataset,
    batch_size=BATCH_SIZE,
    pin_memory=True,
    num_workers = 8,
    shuffle = True
    )

In [None]:
test_dataset = custom_dataset(pd.concat([train_qr,test_qr]),test_data_pos + test_data_neg,
                               img_dir,torch.ones(len(test_data_pos))+
                               torch.zeros(len(test_data_neg)),
                               transform = transform_pipe)

In [None]:
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    pin_memory=True,
    num_workers = 8,
    shuffle = True
    )

#Model

### Downloading models

In [None]:
from torchvision import models
res50 = models.resnet50(pretrained = True)
res50 = res50.to(device)
for param in res50.parameters():
    param.requires_grad = True
# res50.eval()

distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
distilbert = distilbert.to(device)
for param in distilbert.parameters():
    param.requires_grad = True


### Forward Hook

In [None]:
def get_activation(name, activation):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

### 2d intra attention layer

In [None]:
import math
class Final2d(torch.nn.Module):
    """ Custom Linear layer for equation (9) in paper --> 2D intra attention layer""" 
    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        weights = torch.Tensor(size_out, size_in)
        self.weights = torch.nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.

        # initialize weights and biases
        torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
        fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights)

    def forward(self, x, a):
        # x.shape = batch_size, 768, 119
        # a.shape = batch_size, 119
        n = x.shape[2]
        h_cap = torch.zeros(x.shape[0], x.shape[1], device = device)
        # for k in range(x.shape[0]):
        #     for i in range(n):
        #         h_cap[k] = h_cap[k] + a[k][i]*torch.matmul(self.weights, x[k, :,i])
        z = torch.matmul(self.weights, x)
        for k in range(x.shape[0]):
            h_cap[k] = torch.sum(a[k, :]*z[k, :, :], dim = 1)
        return h_cap

### Mask

In [None]:
n_val = 360+49 #360 is the text input vector length, and 49 is the image vector length
sz = (BATCH_SIZE, n_val*n_val, 1)
mask = torch.zeros(sz, dtype=torch.bool, device = device)
for i in range(n_val):
    mask[:, i*n_val+i] = 1
        
for i in range(n_val-50, n_val):
    for j in range(n_val-50, n_val):
        mask[:, i*n_val+j] = 1

### Model class

In [None]:
class BridgeModel(torch.nn.Module):
    # define model elements
    def __init__(self):
        super(BridgeModel, self).__init__()
        # input to first hidden layer
        self.activation = {}
        # self.res50 = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
        # self.res50 = models.resnet50(pretrained = True)
        # res50.eval()
        self.res50 = res50.to(device)
        self.res50.layer4[2].relu.register_forward_hook(get_activation('conv1', self.activation))
        self.res50.avgpool.register_forward_hook(get_activation('conv2', self.activation))
        self.bridge_seven_conv = torch.nn.Conv2d(in_channels=2048, out_channels=768, kernel_size=1, stride=1, padding = 0)
        self.bridge_one_conv = torch.nn.Conv2d(in_channels=2048, out_channels=768, kernel_size=1, stride=1, padding = 0)
        self.flatten = torch.nn.Flatten(start_dim = 2)
        self.distilbert = distilbert.to(device)
        # self.mod_v = list()
        self.mod_v = 360 #Overall text token length
        self.linear_2d = torch.nn.Linear(768*2, 1)
        self.soft_2d = torch.nn.Softmax(dim = 1)
        self.final_2d = Final2d(768, 768) #2D intra attention layer
        self.fc = torch.nn.Linear(768+2048+768, 500)
        # self.sigm = torch.nn.Sigmoid()
    # forward propagate input
    def forward(self, X1, X2):
        # input to first hidden layer/
        res_out = self.res50(X1)  # torch.Size([batch_size, 1000])
        seven_conv = self.activation['conv1'] # torch.Size([batch_size, 2048, 7, 7])
        one_conv = self.activation['conv2'] # torch.Size([batch_size, 2048, 1, 1])
        out3 = one_conv # torch.Size([batch_size, 2048, 1, 1])
        out3 = out3[:, :, 0, 0].clone()
        seven_conv = self.bridge_seven_conv(seven_conv)  # torch.Size([batch_size, 768, 7, 7])
        one_conv = self.bridge_one_conv(one_conv)  # torch.Size([batch_size, 768, 1, 1])
        seven_conv = self.flatten(seven_conv) # torch.Size([batch_size, 768, 49])
        one_conv = self.flatten(one_conv) # torch.Size([batch_size, 768, 1])
        bridge_cat_temp = torch.cat((one_conv, seven_conv), dim = 2) # torch.Size([1, 768, 50])
        bridge_cat = bridge_cat_temp.permute(0, 2, 1) # torch.Size([1, 50, 768])
        distilbert_embed = self.distilbert.embeddings(input_ids = X2) # torch.Size([1, 512, 768])
        distilbert_embed = distilbert_embed[:,:self.mod_v,:]
        attn_mask = torch.ones(distilbert_embed.shape[0],self.mod_v + 50).to(device)
        prev = torch.cat((distilbert_embed, bridge_cat), dim = 1) #562
        prev2 = prev[:, self.mod_v:, :]
        
        for i in range(6):
            part1 = self.distilbert.transformer.layer[i](prev,attn_mask)[0] # torch.Size([1, 562, 768])
            part2 = torch.matmul(part1[:, self.mod_v:, :], self.distilbert.transformer.layer[i].attention.v_lin.weight)
            part1 = part1[:, :self.mod_v, :] # torch.Size([1, 512, 768])
            part1 = torch.cat((part1, part2), dim = 1)
            prev = part1
            prev2 = prev[:, self.mod_v:, :]
            
        out1 = prev[:, 0, :]
        prev = prev[:, 1:, :]
        
        n = self.mod_v + 49 # length(n) = |v| + |g|

        # mask = self.mask_creater(BATCH_SIZE,n) 

        hij = torch.zeros(prev.shape[0], n*n, 2*prev.shape[2], device = device) # batch_size, 119*119=14161, 768+768
        # for k in range(prev.shape[0]):
        #    for i in range(n):
        #        for j in range(n):
        #            hij[k][i*n+j] = torch.cat((prev[k][i], prev[k][j]), dim = 0)

        prev_repeat1 =  prev.repeat(1, n, 1) 
        prev_repeat2 = torch.repeat_interleave(prev, repeats = n, dim = 1)
        hij = torch.cat((prev_repeat1, prev_repeat2), dim = 2)

        sij = self.linear_2d(hij) # batch_size, 14161, 1
        
        # aij = self.soft_2d(sij) # batch_size, 14161, 1
        
        sij[mask] = -1000

        aij_tmp = self.soft_2d(sij) # batch_size, 14161, 1

        aij = aij_tmp.reshape(aij_tmp.shape[0], n, n) # batch_size, n, n
        
        ai_cap = torch.zeros(prev.shape[0], n, device = device) # batch_size, n
        
        ai_cap = (torch.sum(aij, dim = 1) + torch.sum(aij, dim = 2))/2
        
        #for k in range(prev.shape[0]):
         #   for i in range(n):
          #      for j in range(n):
           #         ai_cap[k][i] = ai_cap[k][i] + aij[k][i*n+j][0]/2
            #        ai_cap[k][i] = ai_cap[k][i] + aij[k][j*n+i][0]/2
        
        new_hi = prev.permute(0, 2, 1)
        out2 = self.final_2d(new_hi, ai_cap) # torch.Size([batch_size, 768])
        
        out = torch.cat((out1, out2, out3), dim = 1) # torch.Size([batch_size, 2048+768+768])
        out = self.fc(out) # torch.Size([batch_size, 500])
        # out = torch.nn.functional.normalize(out, p=2, dim=1, eps=1e-12) #L2 normalizing the final tensor
        
        return out

    # def mask_creater(self,n_val,BATCH_SIZE=BATCH_SIZE):
    #     print(n_val)
    #     sz = (BATCH_SIZE, n_val*n_val, 1)
    #     mask = torch.zeros(sz, dtype=torch.bool, device = device)
    #     for i in range(n_val):
    #         mask[:, i*n_val+i] = 1
                
    #     for i in range(n_val-50, n_val):
    #         for j in range(n_val-1, n_val):
    #             mask[:, i*n_val+j] = 1
        
    #     return mask

# Losses

## Multiple Negative Ranking loss

In [None]:
class negrankloss(nn.Module):
    def __init__(self,scale: float = 20.0,reduction:str = 'mean'):
        # self.cosine_sim = nn.CosineSimilarity()
        super().__init__()
        self.red = reduction
        self.scale = scale
        self.cross_entropy = nn.CrossEntropyLoss(reduction=self.red)

    def cal_loss(self,emb1: torch.as_tensor,emb2: torch.as_tensor, label: torch.Tensor):
        scores  = pytorch_cos_sim(emb1,emb2) 
        # print(f'The scores of cosine similarity for MNRloss is {scores}')
        labels = torch.as_tensor(range(len(scores)), dtype=torch.long, device=scores.device)
        loss = self.cross_entropy(scores, labels)
        return loss

In [None]:
MNRloss = negrankloss()

## Online Constrantive Loss


In [None]:
class SiameseDistanceMetric(Enum):
    """
    The metric for the contrastive loss
    """
    EUCLIDEAN = lambda x, y: F.pairwise_distance(x, y, p=2)
    MANHATTAN = lambda x, y: F.pairwise_distance(x, y, p=1)
    COSINE_DISTANCE = lambda x, y: 1-F.cosine_similarity(x, y)


In [None]:
class OnlineConstrantiveLoss(nn.Module):
    def __init__(self,distance_metric=SiameseDistanceMetric.COSINE_DISTANCE,margin: float = 0.5):
        super().__init__()
        self.distance_metric = distance_metric
        self.margin = margin

    def cal_loss(self,emb1,emb2,labels,size_average=False):
        distance_matrix = self.distance_metric(emb1,emb2)
        # print(f'distance matrix of OCloss is {distance_matrix}')
        negs = distance_matrix[labels == 0]
        poss = distance_matrix[labels == 1]
        # print(f'positive and negatives are {poss} and {negs}')

        # select hard positive and hard negative pairs
        # But for current batch size of 1, it is impossible to consider hard positive and hard negative
        # negative_pairs = negs[negs < (poss.max() if len(poss) > 1 else negs.mean())]
        # positive_pairs = poss[poss > (negs.min() if len(negs) > 1 else poss.mean())]

        negative_pairs = negs
        positive_pairs = poss

        # print(f'positive and negative pairs are {positive_pairs} and {negative_pairs}')

        positive_loss = positive_pairs.pow(2).sum()
        negative_loss = F.relu(self.margin - negative_pairs).pow(2).sum()
        # print(f'positive loss is {positive_loss} and negative loss is {negative_loss}')
        loss = positive_loss + negative_loss
        return loss
    

In [None]:
OCloss = OnlineConstrantiveLoss()

## Cosine Similarity Loss

In [None]:
class CosineSimilarityLoss(nn.Module):
    def __init__(self, loss_fct = nn.MSELoss(), cos_score_transformation=nn.Identity()):
        super(CosineSimilarityLoss, self).__init__()
        self.loss_fct = loss_fct
        self.cos_score_transformation = cos_score_transformation


    def cal_loss(self,emb1:torch.Tensor,emb2:torch.Tensor, labels: torch.Tensor):
        output = self.cos_score_transformation(torch.cosine_similarity(emb1,emb2))
        return self.loss_fct(output, labels.view(-1))

In [None]:
Cosloss = CosineSimilarityLoss()

# Calculating loss and optimizing

In [None]:
from torch.optim import Adam
from torch.nn import BCELoss
import time
import copy
from transformers import get_linear_schedule_with_warmup

def train_model(train_loader, model,num_epochs):
    model.train()

    since = time.time()

    optimizer = Adam(model.parameters())
    train_steps = num_epochs

    acc_steps = fin_BATCH_SIZE/BATCH_SIZE

    use_amp = False
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
    
    # enumerate epochs
    for epoch in range(num_epochs):

        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        running_OCloss = 0.0
        running_MNRloss = 0.0
        running_Cosloss = 0.0

        for i, batch in enumerate(train_loader):

            with torch.cuda.amp.autocast(enabled=use_amp):   
                images = batch['image']
                token = batch['token']
                label = batch['label']
                score = batch['score']

                images[0],images[1] = images[0].to(device),images[1].to(device)
                token[0],token[1] = token[0].to(device), token[1].to(device)
                label = label.to(device)
                score = score.to(device)
                
                # compute the model output
                yhat1 = model(images[0],token[0])
                yhat2 = model(images[1],token[1])

                OCloss_val = OCloss.cal_loss(yhat1,yhat2,label)
                running_OCloss +=  OCloss_val.item()*BATCH_SIZE
                OCloss_val = OCloss_val/acc_steps
                # print(f'Ocloss values is {OCloss_val.item()}')
    
                MNRloss_val = MNRloss.cal_loss(yhat1,yhat2,label)
                running_MNRloss += MNRloss_val.item()*BATCH_SIZE
                MNRloss_val = MNRloss_val/acc_steps
                 # print(f'MNRloss values is {MNRloss_val}')

                Cosloss_val = Cosloss.cal_loss(yhat1,yhat2,score)
                running_Cosloss += Cosloss_val.item()*BATCH_SIZE
                Cosloss_val = Cosloss_val/acc_steps


            scaler.scale(OCloss_val).backward(retain_graph = True)
            scaler.scale(MNRloss_val).backward(retain_graph=True)
            scaler.scale(Cosloss_val).backward()

            if((i+1)%acc_steps == 0):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()

            if(i%30 == 0 ):
                print(' OCloss is {} and MNRloss is {} and Cosloss is {} and time taken is {} after {} iterations'.format(
                    running_OCloss/((i+1)*BATCH_SIZE),
                    running_MNRloss/((i+1)*BATCH_SIZE),
                    running_Cosloss/((i+1)*BATCH_SIZE),
                    time.time()-since,
                    i))

            del yhat1,yhat2, images, label, token, score, OCloss_val, Cosloss_val, MNRloss_val

        save_dir = folder + '/model_state_dict'
        os.makedirs(save_dir, exist_ok=True) 
        torch.save(model.state_dict(), save_dir + '/distilbert_cossim.bin') 
        print(f"Model Saved after epoch {epoch}")

# Training the model

In [None]:
torch.cuda.empty_cache()
# torch.cuda.ipc_collect()

In [None]:
!nvidia-smi

Mon Feb 15 18:11:25 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   63C    P0    30W /  70W |   1328MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
model = BridgeModel().to(device)
print(sum(p.numel() for p in model.parameters()))

97451037


In [None]:
train_model(train_loader,model,1)

Epoch 0/0
----------
 OCloss is 0.002375280950218439 and MNRloss is 0.6944969296455383 and Cosloss is 0.18439336121082306 and time taken is 42.082067012786865 after 0 iterations
 OCloss is 0.03494626876944551 and MNRloss is 0.7028353329627745 and Cosloss is 0.25314821399027304 and time taken is 180.1316978931427 after 30 iterations
 OCloss is 0.01776020457117371 and MNRloss is 0.6980558262496698 and Cosloss is 0.2520949334883299 and time taken is 348.59856247901917 after 60 iterations
 OCloss is 0.011906538631371128 and MNRloss is 0.6964812173948183 and Cosloss is 0.24214543582318904 and time taken is 515.3554110527039 after 90 iterations
 OCloss is 0.00895527454179868 and MNRloss is 0.6956738164602232 and Cosloss is 0.24628667419483838 and time taken is 682.9870879650116 after 120 iterations
 OCloss is 0.007177171607264861 and MNRloss is 0.69517648614795 and Cosloss is 0.24278666071643104 and time taken is 815.4570307731628 after 150 iterations
 OCloss is 0.005990727226712252 and MNRl

  "Palette images with Transparency expressed in bytes should be "


 OCloss is 0.0009252511661413488 and MNRloss is 0.6931818946759225 and Cosloss is 0.23302959788341243 and time taken is 17452.928606987 after 3240 iterations
 OCloss is 0.0013239819040468915 and MNRloss is 0.6932093867720613 and Cosloss is 0.2333045216366644 and time taken is 17609.810970067978 after 3270 iterations
 OCloss is 0.0013119691908857769 and MNRloss is 0.6932097517949312 and Cosloss is 0.2332696698727896 and time taken is 17752.01113176346 after 3300 iterations
 OCloss is 0.0013001601946285273 and MNRloss is 0.6932071743516799 and Cosloss is 0.23293365239403657 and time taken is 17910.309165477753 after 3330 iterations
 OCloss is 0.001288576554039919 and MNRloss is 0.6932069812101893 and Cosloss is 0.23320273558779048 and time taken is 18073.736450195312 after 3360 iterations
 OCloss is 0.0012771946008460495 and MNRloss is 0.6932072223234725 and Cosloss is 0.2331420189191004 and time taken is 18238.942098379135 after 3390 iterations
 OCloss is 0.001266010320667285 and MNRlos

In [None]:
model.load_state_dict(torch.load(folder+'/model_state_dict/distilbert.bin'))

<All keys matched successfully>

# Evaluator

In [None]:
model = BridgeModel().to(device)

In [None]:
model.load_state_dict(torch.load('/gdrive/MyDrive/MultiModal/model_state_dict/distilbert.bin'))

<All keys matched successfully>

## Binary Classification Evaluator

### Imports

In [None]:
# from . import SentenceEvaluator
import logging
import os
import csv
from sklearn.metrics.pairwise import paired_cosine_distances, paired_euclidean_distances, paired_manhattan_distances
from sklearn.metrics import average_precision_score
import numpy as np
from typing import List
# from ..readers import InputExample


logger = logging.getLogger(__name__)


### Functiion

In [None]:
class BinaryClassificationEvaluator():
    """
    Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and
    dissimilar sentences.
    The metrics are the cosine similarity as well as euclidean and Manhattan distance
    The returned score is the accuracy with a specified metric.
    The results are written in a CSV. If a CSV already exists, then values are appended.
    The labels need to be 0 for dissimilar pairs and 1 for similar pairs.
    :param sentences1: The first column of sentences
    :param sentences2: The second column of sentences
    :param labels: labels[i] is the label for the pair (sentences1[i], sentences2[i]). Must be 0 or 1
    :param name: Name for the output
    :param batch_size: Batch size used to compute embeddings
    :param show_progress_bar: If true, prints a progress bar
    :param write_csv: Write results to a CSV file
    """

    def __init__(self,
                 dataset,
                 name: str = '',
                 batch_size: int = 32,
                 show_progress_bar: bool = False,
                 write_csv: bool = True
                 ):
        
        self.dataset = dataset
        self.labels = list()
        self.write_csv = write_csv
        self.name = name
        self.batch_size = batch_size
        self.dataloader = DataLoader(
            self.dataset,
            batch_size=self.batch_size,
            pin_memory=True,
            num_workers = 8,
            shuffle = True
        )

        if show_progress_bar is None:
            show_progress_bar = (logger.getEffectiveLevel() == logging.INFO or logger.getEffectiveLevel() == logging.DEBUG)
        self.show_progress_bar = show_progress_bar

        self.csv_file = "binary_classification_evaluation" + ("_"+name if name else '') + "_results.csv"
        self.csv_headers = ["epoch", "steps",
                            "cosine_acc", "cosine_acc_threshold", "cosine_f1", "cosine_precision", "cosine_recall", "cosine_f1_threshold", "cosine_average_precision",
                            "manhatten_acc", "manhatten_acc_threshold", "manhatten_f1", "manhatten_precision", "manhatten_recall", "manhatten_f1_threshold", "manhatten_average_precision",
                            "eucledian_acc", "eucledian_acc_threshold", "eucledian_f1", "eucledian_precision", "eucledian_recall", "eucledian_f1_threshold", "eucledian_average_precision"]


    # @classmethod
    # def from_input_examples(cls, examples: List[InputExample], **kwargs):
    #     sentences1 = []
    #     sentences2 = []
    #     scores = []

    #     for example in examples:
    #         sentences1.append(example.texts[0])
    #         sentences2.append(example.texts[1])
    #         scores.append(example.label)
    #     return cls(sentences1, sentences2, scores, **kwargs)

    def __call__(self, model, output_path: str = None, epoch: int = -1, steps: int = -1) -> float:

        if epoch != -1:
            if steps == -1:
                out_txt = f" after epoch {epoch}:"
            else:
                out_txt = f" in epoch {epoch} after {steps} steps:"
        else:
            out_txt = ":"

        logger.info("Binary Accuracy Evaluation of the model on " + self.name + " dataset" + out_txt)
        
        embeddings1 = list()
        embeddings2 = list()

        with torch.no_grad():
            model.eval()
            for i, batch in enumerate(self.dataloader):
                images = batch["image"]
                label = batch["label"]
                label = label.float()
                token = batch["token"]
                
                # print(images,label,token)

                images[0],images[1] = images[0].to(device),images[1].to(device)
                token[0],token[1] = token[0].to(device), token[1].to(device)
                label = label.to(device)
                
                # compute the model output
                yhat1 = model(images[0], token[0])
                yhat2 = model(images[1],token[1])

                for j in yhat1:
                    embeddings1.append(j.cpu().detach().numpy())
                for j in yhat2:
                    embeddings2.append(j.cpu().detach().numpy())
                for j in label:
                    self.labels.append(float(j))
                
                if(i%30==0 and i!=0):
                    print(f'Completed {i} iterations')

        cosine_scores = 1-paired_cosine_distances(embeddings1[:400], embeddings2[:400])
        manhattan_distances = paired_manhattan_distances(embeddings1[:400], embeddings2[:400])
        euclidean_distances = paired_euclidean_distances(embeddings1[:400], embeddings2[:400])


        labels = np.asarray(self.labels[:400])

        file_output_data = [epoch, steps]

        main_score = None
        for name, scores, reverse in [['Cosine-Similarity', cosine_scores, True], ['Manhatten-Distance', manhattan_distances, False], ['Euclidean-Distance', euclidean_distances, False]]:
            acc, acc_threshold = self.find_best_acc_and_threshold(scores, labels, reverse)
            f1, precision, recall, f1_threshold = self.find_best_f1_and_threshold(scores, labels, reverse)
            ap = average_precision_score(labels, scores * (1 if reverse else -1))

            logger.info("Accuracy with {}:           {:.2f}\t(Threshold: {:.4f})".format(name, acc * 100, acc_threshold))
            logger.info("F1 with {}:                 {:.2f}\t(Threshold: {:.4f})".format(name, f1 * 100, f1_threshold))
            logger.info("Precision with {}:          {:.2f}".format(name, precision * 100))
            logger.info("Recall with {}:             {:.2f}".format(name, recall * 100))
            logger.info("Average Precision with {}:  {:.2f}\n".format(name, ap * 100))

            file_output_data.extend([acc, acc_threshold, f1, precision, recall, f1_threshold, ap])

            if main_score is None: #Use AveragePrecision with Cosine-Similarity as main score
                main_score = ap

        if output_path is not None and self.write_csv:
            csv_path = os.path.join(output_path, self.csv_file)
            if not os.path.isfile(csv_path):
                with open(csv_path, mode="w", encoding="utf-8") as f:
                    writer = csv.writer(f)
                    writer.writerow(self.csv_headers)
                    writer.writerow(file_output_data)
            else:
                with open(csv_path, mode="a", encoding="utf-8") as f:
                    writer = csv.writer(f)
                    writer.writerow(file_output_data)

        return main_score

    @staticmethod
    def find_best_acc_and_threshold(scores, labels, high_score_more_similar: bool):
        # assert len(scores) == len(labels)
        rows = list(zip(scores, labels))

        rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)

        max_acc = 0
        best_threshold = -1

        positive_so_far = 0
        remaining_negatives = sum(labels == 0)

        for i in range(len(rows)-1):
            score, label = rows[i]
            if label == 1:
                positive_so_far += 1
            else:
                remaining_negatives -= 1

            acc = (positive_so_far + remaining_negatives) / len(labels)
            if acc > max_acc:
                max_acc = acc
                best_threshold = (rows[i][0] + rows[i+1][0]) / 2

        return max_acc, best_threshold

    @staticmethod
    def find_best_f1_and_threshold(scores, labels, high_score_more_similar: bool):
        # assert len(scores) == len(labels)

        scores = np.asarray(scores)
        labels = np.asarray(labels)

        rows = list(zip(scores, labels))

        rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)

        best_f1 = best_precision = best_recall = 0
        threshold = 0
        nextract = 0
        ncorrect = 0
        total_num_duplicates = sum(labels)

        for i in range(len(rows)-1):
            score, label = rows[i]
            nextract += 1

            if label == 1:
                ncorrect += 1

            if ncorrect > 0:
                precision = ncorrect / nextract
                recall = ncorrect / total_num_duplicates
                f1 = 2 * precision * recall / (precision + recall)
                if f1 > best_f1:
                    best_f1 = f1
                    best_precision = precision
                    best_recall = recall
                    threshold = (rows[i][0] + rows[i + 1][0]) / 2

        return best_f1, best_precision, best_recall, threshold

### Evaluation

In [None]:
dev_BCEvaluator = BinaryClassificationEvaluator(dev_dataset,batch_size=BATCH_SIZE,show_progress_bar=True)

In [None]:
dev_BCEvaluator(model,output_path=folder+'/dev')

Completed 30 iterations
Completed 60 iterations
Completed 90 iterations
Completed 120 iterations
Completed 150 iterations
Completed 180 iterations
Completed 210 iterations
Completed 240 iterations
Completed 270 iterations
Completed 300 iterations
Completed 330 iterations
Completed 360 iterations
Completed 390 iterations
Completed 420 iterations
Completed 450 iterations
Completed 480 iterations
Completed 510 iterations
Completed 540 iterations
Completed 570 iterations
Completed 600 iterations
Completed 630 iterations
Completed 660 iterations
Completed 690 iterations
Completed 720 iterations
Completed 750 iterations
Completed 780 iterations
Completed 810 iterations
Completed 840 iterations
Completed 870 iterations
Completed 900 iterations
Completed 930 iterations
Completed 960 iterations
Completed 990 iterations
Completed 1020 iterations
Completed 1050 iterations
Completed 1080 iterations
Completed 1110 iterations
Completed 1140 iterations
Completed 1170 iterations
Completed 1200 iterati

1.0

## Information retreival evaluator

In [None]:
import torch
import logging
from tqdm import tqdm, trange
import os
import numpy as np
from typing import List, Tuple, Dict, Set

In [None]:
class infodataset(Dataset):
    def __init__(self,qr,qr_idx,img_dir,transform = None):
        self.qr = qr
        self.qr_idx = qr_idx
        self.img_dir = img_dir
        self.transform = transform

    def image_adder(self,id1):
        img_id1 = list()
        if((self.qr.at[id1,'Attachments'])!=None):
            for i in self.qr.at[id1,'Attachments']:
                try:
                    img_path = os.path.join(self.img_dir,i)
                    img = Image.open(img_path).convert('RGB')
                    if(self.transform):
                        img = self.transform(img)
                        img.reshape(3,224,224)
                    img_id1.append(img)
                except Exception as e: 
                    print(e)
        else:
            img_id1.append(torch.zeros(3,224,224))

        # Work on this, for few examples, it is still saying list index out of range
        if(len(img_id1)==0):
            # print('No attachments found for id {}'.format(id1))
            print(f'Something went wrong with the image of id {id1}')
            img_id1.append(torch.zeros(3,224,224))

        return img_id1
    
    def __getitem__(self,idx):
        id1 = self.qr_idx[idx]
        img_id1 = self.image_adder(id1)

        # print(len(img_id1))

        # print('Printing id1 {} and len {} and id2 {} and len {} '.format(
        #     id1,len(img_id1),
        #     id2, len(img_id2)
        # ))

        # print('Printing id1 shape {} and id2  shape {}'.format(
        #     img_id1[0].shape,
        #     img_id2[0].shape
        # ))        

        sample = {
            'image': img_id1[0]    #Currently taking only one input image
        }

        t1 = '[CLS]' + self.qr.loc[id1,'Title'] + ' ' + ' '.join(self.qr.loc[id1,'Tags']) + ' ' + self.qr.loc[id1,'Text'] + '[SEP]'
        tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        t1_token = tokenizer.tokenize(t1)
        indexed_t1 = tokenizer.convert_tokens_to_ids(t1_token)
        
        while(len(indexed_t1)<512):
            indexed_t1.append(0)
        
        ten_t1 = torch.as_tensor(indexed_t1)[:512]
        
        try:
            sample["token"] = ten_t1 # torch.Size([batch_size, 512])
        except Exception as e:
            print(e)
        
        return sample

    def __len__(self):
        return len(self.qr_idx)

In [None]:
class InformationRetreivalEvaluator():
    def __init__(self,
                 qr,
                 queries: Dict[str, str],  #qid => query
                 corpus: Dict[str, str],  #cid => doc
                 relevant_docs: Dict[str, Set[str]],  #qid => Set[cid]
                 corpus_chunk_size: int = 50000,
                 mrr_at_k: List[int] = [10],
                 ndcg_at_k: List[int] = [10],
                 accuracy_at_k: List[int] = [1, 3, 5, 10],
                 precision_recall_at_k: List[int] = [1, 3, 5, 10],
                 map_at_k: List[int] = [100],
                 show_progress_bar: bool = False,
                 batch_size: int = 32,
                 name: str = '',
                 write_csv: bool = True
                 ):
        
        self.qr = qr
        self.queries_ids = []
        for qid in queries:
            if qid in relevant_docs and len(relevant_docs[qid]) > 0:
                self.queries_ids.append(qid)

        self.queries = [queries[qid] for qid in self.queries_ids]

        self.corpus_ids = list(corpus.keys())
        self.corpus = [corpus[cid] for cid in self.corpus_ids]

        self.relevant_docs = relevant_docs
        self.corpus_chunk_size = corpus_chunk_size
        self.mrr_at_k = mrr_at_k
        self.ndcg_at_k = ndcg_at_k
        self.accuracy_at_k = accuracy_at_k
        self.precision_recall_at_k = precision_recall_at_k
        self.map_at_k = map_at_k

        self.show_progress_bar = show_progress_bar
        self.batch_size = batch_size
        self.name = name
        self.write_csv = write_csv

        if name:
            name = "_" + name

        self.csv_file: str = "Information-Retrieval_evaluation" + name + "_results.csv"
        self.csv_headers = ["epoch", "steps"]


        for k in accuracy_at_k:
            self.csv_headers.append("Accuracy@{}".format(k))

        for k in precision_recall_at_k:
            self.csv_headers.append("Precision@{}".format(k))
            self.csv_headers.append("Recall@{}".format(k))

        for k in mrr_at_k:
            self.csv_headers.append("MRR@{}".format(k))

        for k in ndcg_at_k:
            self.csv_headers.append("NDCG@{}".format(k))

        for k in map_at_k:
            self.csv_headers.append("MAP@{}".format(k))
    
    def __call__(self,model : BridgeModel,output_path: str = None,epoch: int = -1, steps: int = -1) ->float:
        if epoch != -1:
            out_txt = " after epoch {}:".format(epoch) if steps == -1 else " in epoch {} after {} steps:".format(epoch, steps)
        else:
            out_txt = ":"

        logger.info("Information Retrieval Evaluation on " + self.name + " dataset" + out_txt)

        max_k = max(max(self.mrr_at_k), max(self.ndcg_at_k), max(self.accuracy_at_k), max(self.precision_recall_at_k), max(self.map_at_k))

        query_embeddings = self.get_embeddings(model,self.qr,self.queries_ids)

        queries_result_list = [[] for _ in range(len(query_embeddings))]

        itr = range(0, len(self.corpus), self.corpus_chunk_size)

        if self.show_progress_bar:
            itr = tqdm(itr, desc='Corpus Chunks')

        #Iterate over chunks of the corpus
        for corpus_start_idx in itr:
            corpus_end_idx = min(corpus_start_idx + self.corpus_chunk_size, len(self.corpus))

            #Encode chunk of corpus
            sub_corpus_embeddings = self.get_embeddings(model,self.qr,self.corpus_ids[corpus_start_idx:corpus_end_idx])

            #Compute cosine similarites
            cos_scores = pytorch_cos_sim(query_embeddings, sub_corpus_embeddings)
            del sub_corpus_embeddings

            #Get top-k values
            cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(cos_scores, min(max_k, len(cos_scores[0])), dim=1, largest=True, sorted=False)
            cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
            cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
            del cos_scores

            for query_itr in range(len(query_embeddings)):
                for sub_corpus_id, score in zip(cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr]):
                    corpus_id = self.corpus_ids[corpus_start_idx+sub_corpus_id]
                    queries_result_list[query_itr].append({'corpus_id': corpus_id, 'score': score})


        #Compute scores
        scores = self.compute_metrics(queries_result_list)

        #Output
        self.output_scores(scores)


        # logger.info("Queries: {}".format(len(self.queries)))
        # logger.info("Corpus: {}\n".format(len(self.corpus)))

        if output_path is not None and self.write_csv:
            csv_path = os.path.join(output_path, self.csv_file)
            if not os.path.isfile(csv_path):
                fOut = open(csv_path, mode="w", encoding="utf-8")
                fOut.write(",".join(self.csv_headers))
                fOut.write("\n")

            else:
                fOut = open(csv_path, mode="a", encoding="utf-8")

            output_data = [epoch, steps]
            for k in self.accuracy_at_k:
                output_data.append(scores['accuracy@k'][k])

            for k in self.precision_recall_at_k:
                output_data.append(scores['precision@k'][k])
                output_data.append(scores['recall@k'][k])

            for k in self.mrr_at_k:
                output_data.append(scores['mrr@k'][k])

            for k in self.ndcg_at_k:
                output_data.append(scores['ndcg@k'][k])

            for k in self.map_at_k:
                output_data.append(scores['map@k'][k])

            fOut.write(",".join(map(str,output_data)))
            fOut.write("\n")
            fOut.close()

        return scores['map@k'][max(self.map_at_k)]


    def compute_metrics(self, queries_result_list: List[object]):
        # Init score computation values
        num_hits_at_k = {k: 0 for k in self.accuracy_at_k}
        precisions_at_k = {k: [] for k in self.precision_recall_at_k}
        recall_at_k = {k: [] for k in self.precision_recall_at_k}
        MRR = {k: 0 for k in self.mrr_at_k}
        ndcg = {k: [] for k in self.ndcg_at_k}
        AveP_at_k = {k: [] for k in self.map_at_k}

        # Compute scores on results
        for query_itr in range(len(queries_result_list)):
            query_id = self.queries_ids[query_itr]

            # Sort scores
            top_hits = sorted(queries_result_list[query_itr], key=lambda x: x['score'], reverse=True)
            query_relevant_docs = self.relevant_docs[query_id]

            # Accuracy@k - We count the result correct, if at least one relevant doc is accross the top-k documents
            for k_val in self.accuracy_at_k:
                for hit in top_hits[0:k_val]:
                    if hit['corpus_id'] in query_relevant_docs:
                        num_hits_at_k[k_val] += 1
                        break

            # Precision and Recall@k
            for k_val in self.precision_recall_at_k:
                num_correct = 0
                for hit in top_hits[0:k_val]:
                    if hit['corpus_id'] in query_relevant_docs:
                        num_correct += 1

                precisions_at_k[k_val].append(num_correct / k_val)
                recall_at_k[k_val].append(num_correct / len(query_relevant_docs))

            # MRR@k
            for k_val in self.mrr_at_k:
                for rank, hit in enumerate(top_hits[0:k_val]):
                    if hit['corpus_id'] in query_relevant_docs:
                        MRR[k_val] += 1.0 / (rank + 1)
                        break

            # NDCG@k
            for k_val in self.ndcg_at_k:
                predicted_relevance = [1 if top_hit['corpus_id'] in query_relevant_docs else 0 for top_hit in top_hits[0:k_val]]
                true_relevances = [1] * len(query_relevant_docs)

                ndcg_value = self.compute_dcg_at_k(predicted_relevance, k_val) / self.compute_dcg_at_k(true_relevances, k_val)
                ndcg[k_val].append(ndcg_value)

            # MAP@k
            for k_val in self.map_at_k:
                num_correct = 0
                sum_precisions = 0

                for rank, hit in enumerate(top_hits[0:k_val]):
                    if hit['corpus_id'] in query_relevant_docs:
                        num_correct += 1
                        sum_precisions += num_correct / (rank + 1)

                avg_precision = sum_precisions / min(k_val, len(query_relevant_docs))
                AveP_at_k[k_val].append(avg_precision)

        # Compute averages
        for k in num_hits_at_k:
            num_hits_at_k[k] /= len(self.queries_ids)

        for k in precisions_at_k:
            precisions_at_k[k] = np.mean(precisions_at_k[k])

        for k in recall_at_k:
            recall_at_k[k] = np.mean(recall_at_k[k])

        for k in ndcg:
            ndcg[k] = np.mean(ndcg[k])

        for k in MRR:
            MRR[k] /= len(self.queries_ids)

        for k in AveP_at_k:
            AveP_at_k[k] = np.mean(AveP_at_k[k])


        return {'accuracy@k': num_hits_at_k, 'precision@k': precisions_at_k, 'recall@k': recall_at_k, 'ndcg@k': ndcg, 'mrr@k': MRR, 'map@k': AveP_at_k}


    def output_scores(self, scores):
        for k in scores['accuracy@k']:
            logger.info("Accuracy@{}: {:.2f}%".format(k, scores['accuracy@k'][k]*100))

        for k in scores['precision@k']:
            logger.info("Precision@{}: {:.2f}%".format(k, scores['precision@k'][k]*100))

        for k in scores['recall@k']:
            logger.info("Recall@{}: {:.2f}%".format(k, scores['recall@k'][k]*100))

        for k in scores['mrr@k']:
            logger.info("MRR@{}: {:.4f}".format(k, scores['mrr@k'][k]))

        for k in scores['ndcg@k']:
            logger.info("NDCG@{}: {:.4f}".format(k, scores['ndcg@k'][k]))

        for k in scores['map@k']:
            logger.info("MAP@{}: {:.4f}".format(k, scores['map@k'][k]))


    @staticmethod
    def compute_dcg_at_k(relevances, k):
        dcg = 0
        for i in range(min(len(relevances), k)):
            dcg += relevances[i] / np.log2(i + 2)  #+2 as we start our idx at 0
        return dcg
    
    def get_embeddings(self,model,qr,qr_idx):
        info_dataset = infodataset(qr,qr_idx,img_dir,transform = transform_pipe)
        info_loader = DataLoader(
            info_dataset,
            batch_size=BATCH_SIZE,
            pin_memory=True,
            num_workers = 8,
            )
        
        embeddings = list()
        since = time.time()
        for i,batch in enumerate(info_loader):
            model.eval()

            text = batch['token']
            images = batch['image']

            text,images = torch.as_tensor(text).to(device), torch.as_tensor(images).to(device)

            with torch.no_grad():
                yhat = model.forward(images,text)
            
            for j in yhat:
                embeddings.append(j.cpu().detach().numpy())
            
            if(i%40==0):
                print(f'{i} iterations hase been completed, and model is running for {time.time()-since}')
        
        return embeddings


In [None]:
with open(folder_quora+'/devinfo_100.txt','rb') as a:
    queries_dev = pickle.load(a)
    rel_docs_dev= pickle.load(a)

with open(folder_quora+'/testinfo_100.txt','rb') as a:
    queries_test= pickle.load(a)
    rel_docs_test= pickle.load(a)

with open(folder_quora+'/traininfo_100.txt','rb') as a:
    queries_train= pickle.load(a)
    rel_docs_train= pickle.load(a)

In [None]:
def string_sentence(i,qr):
    return qr.loc[i,'Title']+ ' '+' '.join(qr.loc[i,'Tags'])+ ' ' + qr.loc[i,'Text']

In [None]:
def corpus(qr):
    corpus = dict()
    for i in qr.index.values:
        corpus[i] = string_sentence(i,qr)
    return corpus

In [None]:
train_inforet = InformationRetreivalEvaluator(train_qr,queries_train,corpus(train_qr),rel_docs_train)
os.makedirs(folder+'/info',exist_ok=True)
train_inforet(model,folder+'/info')

0 iterations hase been completed, and model is running for 0.8856837749481201
40 iterations hase been completed, and model is running for 3.478123188018799


Token indices sequence length is longer than the specified maximum sequence length for this model (802 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1843 > 512). Running this sequence through the model will result in indexing errors


80 iterations hase been completed, and model is running for 6.241954565048218
0 iterations hase been completed, and model is running for 0.9674324989318848
40 iterations hase been completed, and model is running for 3.6424734592437744


Token indices sequence length is longer than the specified maximum sequence length for this model (893 > 512). Running this sequence through the model will result in indexing errors


80 iterations hase been completed, and model is running for 6.236653089523315
120 iterations hase been completed, and model is running for 8.907212257385254
160 iterations hase been completed, and model is running for 11.506620645523071


Token indices sequence length is longer than the specified maximum sequence length for this model (1708 > 512). Running this sequence through the model will result in indexing errors


200 iterations hase been completed, and model is running for 13.969954013824463


Token indices sequence length is longer than the specified maximum sequence length for this model (1957 > 512). Running this sequence through the model will result in indexing errors


240 iterations hase been completed, and model is running for 16.54749846458435
280 iterations hase been completed, and model is running for 19.120650053024292


Token indices sequence length is longer than the specified maximum sequence length for this model (1759 > 512). Running this sequence through the model will result in indexing errors


320 iterations hase been completed, and model is running for 21.68238854408264
360 iterations hase been completed, and model is running for 24.283759593963623


Token indices sequence length is longer than the specified maximum sequence length for this model (4157 > 512). Running this sequence through the model will result in indexing errors


400 iterations hase been completed, and model is running for 26.956613063812256


Token indices sequence length is longer than the specified maximum sequence length for this model (578 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1137 > 512). Running this sequence through the model will result in indexing errors


440 iterations hase been completed, and model is running for 29.503349781036377
480 iterations hase been completed, and model is running for 32.1271710395813


Token indices sequence length is longer than the specified maximum sequence length for this model (3270 > 512). Running this sequence through the model will result in indexing errors


520 iterations hase been completed, and model is running for 34.6437668800354
560 iterations hase been completed, and model is running for 37.2528018951416
600 iterations hase been completed, and model is running for 39.737855434417725
640 iterations hase been completed, and model is running for 42.31553792953491
680 iterations hase been completed, and model is running for 44.842400789260864


Token indices sequence length is longer than the specified maximum sequence length for this model (2115 > 512). Running this sequence through the model will result in indexing errors


720 iterations hase been completed, and model is running for 47.46142554283142


Token indices sequence length is longer than the specified maximum sequence length for this model (550 > 512). Running this sequence through the model will result in indexing errors


760 iterations hase been completed, and model is running for 49.96222448348999
800 iterations hase been completed, and model is running for 52.40080547332764


Token indices sequence length is longer than the specified maximum sequence length for this model (523 > 512). Running this sequence through the model will result in indexing errors


image file is truncated (3 bytes not processed)
Something went wrong with the image of id 164569
840 iterations hase been completed, and model is running for 54.87905836105347
880 iterations hase been completed, and model is running for 57.41687870025635
920 iterations hase been completed, and model is running for 60.00661587715149
960 iterations hase been completed, and model is running for 62.49047827720642
1000 iterations hase been completed, and model is running for 65.07870864868164


Token indices sequence length is longer than the specified maximum sequence length for this model (1339 > 512). Running this sequence through the model will result in indexing errors


1040 iterations hase been completed, and model is running for 67.5641942024231
1080 iterations hase been completed, and model is running for 70.05850458145142
1120 iterations hase been completed, and model is running for 72.69360303878784
1160 iterations hase been completed, and model is running for 75.23489332199097
1200 iterations hase been completed, and model is running for 77.69236445426941
1240 iterations hase been completed, and model is running for 80.37102842330933


Token indices sequence length is longer than the specified maximum sequence length for this model (1908 > 512). Running this sequence through the model will result in indexing errors


1280 iterations hase been completed, and model is running for 82.93906140327454
1320 iterations hase been completed, and model is running for 85.43973636627197
1360 iterations hase been completed, and model is running for 88.11757683753967
1400 iterations hase been completed, and model is running for 90.62862396240234


Token indices sequence length is longer than the specified maximum sequence length for this model (4089 > 512). Running this sequence through the model will result in indexing errors


1440 iterations hase been completed, and model is running for 93.25732207298279


Token indices sequence length is longer than the specified maximum sequence length for this model (715 > 512). Running this sequence through the model will result in indexing errors


1480 iterations hase been completed, and model is running for 96.01798963546753


Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running this sequence through the model will result in indexing errors


1520 iterations hase been completed, and model is running for 99.782719373703


Token indices sequence length is longer than the specified maximum sequence length for this model (2386 > 512). Running this sequence through the model will result in indexing errors


1560 iterations hase been completed, and model is running for 102.40466094017029


Token indices sequence length is longer than the specified maximum sequence length for this model (585 > 512). Running this sequence through the model will result in indexing errors


1600 iterations hase been completed, and model is running for 104.8958432674408
1640 iterations hase been completed, and model is running for 107.45791983604431
1680 iterations hase been completed, and model is running for 110.0976824760437
1720 iterations hase been completed, and model is running for 112.73395323753357
1760 iterations hase been completed, and model is running for 115.32906341552734
1800 iterations hase been completed, and model is running for 118.29383659362793


Token indices sequence length is longer than the specified maximum sequence length for this model (1843 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3436 > 512). Running this sequence through the model will result in indexing errors


1840 iterations hase been completed, and model is running for 120.97662997245789
1880 iterations hase been completed, and model is running for 123.78635239601135


Token indices sequence length is longer than the specified maximum sequence length for this model (550 > 512). Running this sequence through the model will result in indexing errors


1920 iterations hase been completed, and model is running for 126.34797668457031
1960 iterations hase been completed, and model is running for 128.80212664604187
2000 iterations hase been completed, and model is running for 131.4257185459137
2040 iterations hase been completed, and model is running for 133.89185762405396
2080 iterations hase been completed, and model is running for 136.45922708511353


Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors


2120 iterations hase been completed, and model is running for 139.6079216003418


Token indices sequence length is longer than the specified maximum sequence length for this model (628 > 512). Running this sequence through the model will result in indexing errors


2160 iterations hase been completed, and model is running for 142.32287073135376
2200 iterations hase been completed, and model is running for 144.99061727523804


Token indices sequence length is longer than the specified maximum sequence length for this model (4009 > 512). Running this sequence through the model will result in indexing errors


2240 iterations hase been completed, and model is running for 147.4653604030609
2280 iterations hase been completed, and model is running for 150.0389745235443
2320 iterations hase been completed, and model is running for 152.69701170921326


Token indices sequence length is longer than the specified maximum sequence length for this model (613 > 512). Running this sequence through the model will result in indexing errors


2360 iterations hase been completed, and model is running for 155.3216483592987
2400 iterations hase been completed, and model is running for 158.21587944030762


Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (969 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (664 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3527 > 512). Running this sequence through the model will result in indexing errors


2440 iterations hase been completed, and model is running for 161.16766214370728


Token indices sequence length is longer than the specified maximum sequence length for this model (2095 > 512). Running this sequence through the model will result in indexing errors


2480 iterations hase been completed, and model is running for 163.84451842308044
2520 iterations hase been completed, and model is running for 166.54182314872742


Token indices sequence length is longer than the specified maximum sequence length for this model (4285 > 512). Running this sequence through the model will result in indexing errors


2560 iterations hase been completed, and model is running for 169.3038854598999
2600 iterations hase been completed, and model is running for 171.87788891792297
2640 iterations hase been completed, and model is running for 174.56079363822937


Token indices sequence length is longer than the specified maximum sequence length for this model (555 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors


2680 iterations hase been completed, and model is running for 177.16135692596436


Token indices sequence length is longer than the specified maximum sequence length for this model (10008 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (655 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1656 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (806 > 512). Running this sequence through the model will result in indexing errors


2720 iterations hase been completed, and model is running for 179.80821800231934
2760 iterations hase been completed, and model is running for 182.4702832698822
2800 iterations hase been completed, and model is running for 185.17908382415771


Token indices sequence length is longer than the specified maximum sequence length for this model (580 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2841 > 512). Running this sequence through the model will result in indexing errors


2840 iterations hase been completed, and model is running for 187.91712498664856
2880 iterations hase been completed, and model is running for 190.4338505268097


Token indices sequence length is longer than the specified maximum sequence length for this model (1119 > 512). Running this sequence through the model will result in indexing errors


2920 iterations hase been completed, and model is running for 193.2725019454956


Token indices sequence length is longer than the specified maximum sequence length for this model (1612 > 512). Running this sequence through the model will result in indexing errors


2960 iterations hase been completed, and model is running for 195.98811173439026


Token indices sequence length is longer than the specified maximum sequence length for this model (694 > 512). Running this sequence through the model will result in indexing errors


3000 iterations hase been completed, and model is running for 198.6402509212494


Token indices sequence length is longer than the specified maximum sequence length for this model (1140 > 512). Running this sequence through the model will result in indexing errors


3040 iterations hase been completed, and model is running for 201.35849714279175


Token indices sequence length is longer than the specified maximum sequence length for this model (2514 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (808 > 512). Running this sequence through the model will result in indexing errors


3080 iterations hase been completed, and model is running for 205.36569929122925


Token indices sequence length is longer than the specified maximum sequence length for this model (1014 > 512). Running this sequence through the model will result in indexing errors


3120 iterations hase been completed, and model is running for 208.04548406600952
3160 iterations hase been completed, and model is running for 210.60832285881042
3200 iterations hase been completed, and model is running for 213.16331100463867


Token indices sequence length is longer than the specified maximum sequence length for this model (625 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (897 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1490 > 512). Running this sequence through the model will result in indexing errors


3240 iterations hase been completed, and model is running for 215.69271159172058


Token indices sequence length is longer than the specified maximum sequence length for this model (536 > 512). Running this sequence through the model will result in indexing errors


3280 iterations hase been completed, and model is running for 218.24050307273865


Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors


3320 iterations hase been completed, and model is running for 220.88585305213928


Token indices sequence length is longer than the specified maximum sequence length for this model (792 > 512). Running this sequence through the model will result in indexing errors


3360 iterations hase been completed, and model is running for 223.5522825717926
3400 iterations hase been completed, and model is running for 226.57529187202454
3440 iterations hase been completed, and model is running for 229.0826392173767
3480 iterations hase been completed, and model is running for 231.7031877040863
3520 iterations hase been completed, and model is running for 235.18933987617493
3560 iterations hase been completed, and model is running for 237.80187940597534


Token indices sequence length is longer than the specified maximum sequence length for this model (877 > 512). Running this sequence through the model will result in indexing errors


3600 iterations hase been completed, and model is running for 240.50283670425415
3640 iterations hase been completed, and model is running for 243.22559475898743


Token indices sequence length is longer than the specified maximum sequence length for this model (832 > 512). Running this sequence through the model will result in indexing errors


3680 iterations hase been completed, and model is running for 245.92620992660522


Token indices sequence length is longer than the specified maximum sequence length for this model (1085 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2411 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (890 > 512). Running this sequence through the model will result in indexing errors


3720 iterations hase been completed, and model is running for 248.92879843711853


Token indices sequence length is longer than the specified maximum sequence length for this model (1823 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (766 > 512). Running this sequence through the model will result in indexing errors


3760 iterations hase been completed, and model is running for 252.3208270072937


Token indices sequence length is longer than the specified maximum sequence length for this model (5978 > 512). Running this sequence through the model will result in indexing errors


3800 iterations hase been completed, and model is running for 255.09083938598633


Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors


3840 iterations hase been completed, and model is running for 257.778799533844
3880 iterations hase been completed, and model is running for 260.4871551990509
3920 iterations hase been completed, and model is running for 263.0950622558594
3960 iterations hase been completed, and model is running for 265.6756157875061
4000 iterations hase been completed, and model is running for 268.29007029533386


Token indices sequence length is longer than the specified maximum sequence length for this model (1398 > 512). Running this sequence through the model will result in indexing errors


4040 iterations hase been completed, and model is running for 270.99666142463684


Token indices sequence length is longer than the specified maximum sequence length for this model (3989 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors


4080 iterations hase been completed, and model is running for 273.81050086021423
4120 iterations hase been completed, and model is running for 276.5716519355774


Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors


4160 iterations hase been completed, and model is running for 279.1690547466278
4200 iterations hase been completed, and model is running for 281.7795042991638
4240 iterations hase been completed, and model is running for 284.361932516098


Token indices sequence length is longer than the specified maximum sequence length for this model (562 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2560 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (590 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (651 > 512). Running this sequence through the model will result in indexing errors


4280 iterations hase been completed, and model is running for 286.9014928340912


Token indices sequence length is longer than the specified maximum sequence length for this model (1097 > 512). Running this sequence through the model will result in indexing errors


4320 iterations hase been completed, and model is running for 289.84517645835876


Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (802 > 512). Running this sequence through the model will result in indexing errors


4360 iterations hase been completed, and model is running for 292.47769808769226
4400 iterations hase been completed, and model is running for 295.154988527298
4440 iterations hase been completed, and model is running for 297.8566460609436
4480 iterations hase been completed, and model is running for 300.2699739933014


Token indices sequence length is longer than the specified maximum sequence length for this model (528 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (876 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (593 > 512). Running this sequence through the model will result in indexing errors


4520 iterations hase been completed, and model is running for 303.19382309913635
4560 iterations hase been completed, and model is running for 305.8096787929535
4600 iterations hase been completed, and model is running for 309.006573677063
4640 iterations hase been completed, and model is running for 311.62696957588196


Token indices sequence length is longer than the specified maximum sequence length for this model (809 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (570 > 512). Running this sequence through the model will result in indexing errors


4680 iterations hase been completed, and model is running for 314.9603228569031
4720 iterations hase been completed, and model is running for 317.5916030406952
4760 iterations hase been completed, and model is running for 320.07470083236694
4800 iterations hase been completed, and model is running for 322.6126093864441


Token indices sequence length is longer than the specified maximum sequence length for this model (519 > 512). Running this sequence through the model will result in indexing errors


4840 iterations hase been completed, and model is running for 325.1393883228302


Token indices sequence length is longer than the specified maximum sequence length for this model (9739 > 512). Running this sequence through the model will result in indexing errors


4880 iterations hase been completed, and model is running for 327.7905304431915


Token indices sequence length is longer than the specified maximum sequence length for this model (1326 > 512). Running this sequence through the model will result in indexing errors


4920 iterations hase been completed, and model is running for 330.46126222610474


Token indices sequence length is longer than the specified maximum sequence length for this model (664 > 512). Running this sequence through the model will result in indexing errors


4960 iterations hase been completed, and model is running for 333.72746896743774


Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors


5000 iterations hase been completed, and model is running for 336.25618290901184


Token indices sequence length is longer than the specified maximum sequence length for this model (565 > 512). Running this sequence through the model will result in indexing errors


5040 iterations hase been completed, and model is running for 339.10152554512024


Token indices sequence length is longer than the specified maximum sequence length for this model (586 > 512). Running this sequence through the model will result in indexing errors


5080 iterations hase been completed, and model is running for 341.76183009147644
5120 iterations hase been completed, and model is running for 344.32851576805115


Token indices sequence length is longer than the specified maximum sequence length for this model (1601 > 512). Running this sequence through the model will result in indexing errors


5160 iterations hase been completed, and model is running for 346.9846725463867


Token indices sequence length is longer than the specified maximum sequence length for this model (618 > 512). Running this sequence through the model will result in indexing errors


5200 iterations hase been completed, and model is running for 349.68683218955994


Token indices sequence length is longer than the specified maximum sequence length for this model (740 > 512). Running this sequence through the model will result in indexing errors


5240 iterations hase been completed, and model is running for 352.36608028411865


Token indices sequence length is longer than the specified maximum sequence length for this model (716 > 512). Running this sequence through the model will result in indexing errors


5280 iterations hase been completed, and model is running for 355.61923575401306
5320 iterations hase been completed, and model is running for 358.3277733325958
5360 iterations hase been completed, and model is running for 360.964396238327
5400 iterations hase been completed, and model is running for 363.49634552001953
5440 iterations hase been completed, and model is running for 366.13134145736694


Token indices sequence length is longer than the specified maximum sequence length for this model (1074 > 512). Running this sequence through the model will result in indexing errors


5480 iterations hase been completed, and model is running for 368.8617489337921
5520 iterations hase been completed, and model is running for 371.5421905517578


Token indices sequence length is longer than the specified maximum sequence length for this model (1034 > 512). Running this sequence through the model will result in indexing errors


5560 iterations hase been completed, and model is running for 374.1155436038971


Token indices sequence length is longer than the specified maximum sequence length for this model (535 > 512). Running this sequence through the model will result in indexing errors


5600 iterations hase been completed, and model is running for 376.6978540420532
5640 iterations hase been completed, and model is running for 379.32415413856506
5680 iterations hase been completed, and model is running for 382.0591011047363
5720 iterations hase been completed, and model is running for 384.6441538333893
5760 iterations hase been completed, and model is running for 387.16873693466187
5800 iterations hase been completed, and model is running for 389.6358675956726


Token indices sequence length is longer than the specified maximum sequence length for this model (1446 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running this sequence through the model will result in indexing errors


5840 iterations hase been completed, and model is running for 392.26647758483887
5880 iterations hase been completed, and model is running for 394.8036277294159


Token indices sequence length is longer than the specified maximum sequence length for this model (1323 > 512). Running this sequence through the model will result in indexing errors


5920 iterations hase been completed, and model is running for 397.4328444004059


Token indices sequence length is longer than the specified maximum sequence length for this model (2384 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (703 > 512). Running this sequence through the model will result in indexing errors


5960 iterations hase been completed, and model is running for 399.9716296195984
6000 iterations hase been completed, and model is running for 402.5628294944763


Token indices sequence length is longer than the specified maximum sequence length for this model (522 > 512). Running this sequence through the model will result in indexing errors


6040 iterations hase been completed, and model is running for 406.0761706829071


Token indices sequence length is longer than the specified maximum sequence length for this model (566 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1450 > 512). Running this sequence through the model will result in indexing errors


6080 iterations hase been completed, and model is running for 408.6216151714325
6120 iterations hase been completed, and model is running for 411.24605441093445


Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors


6160 iterations hase been completed, and model is running for 413.909930229187
6200 iterations hase been completed, and model is running for 416.4760594367981
6240 iterations hase been completed, and model is running for 419.2959339618683


Token indices sequence length is longer than the specified maximum sequence length for this model (570 > 512). Running this sequence through the model will result in indexing errors


6280 iterations hase been completed, and model is running for 421.9280688762665


Token indices sequence length is longer than the specified maximum sequence length for this model (695 > 512). Running this sequence through the model will result in indexing errors


6320 iterations hase been completed, and model is running for 424.51249957084656
6360 iterations hase been completed, and model is running for 427.172447681427


Token indices sequence length is longer than the specified maximum sequence length for this model (1818 > 512). Running this sequence through the model will result in indexing errors


6400 iterations hase been completed, and model is running for 429.7524244785309
6440 iterations hase been completed, and model is running for 432.4273257255554


Token indices sequence length is longer than the specified maximum sequence length for this model (687 > 512). Running this sequence through the model will result in indexing errors


6480 iterations hase been completed, and model is running for 435.11587166786194


Token indices sequence length is longer than the specified maximum sequence length for this model (640 > 512). Running this sequence through the model will result in indexing errors


6520 iterations hase been completed, and model is running for 437.74440121650696


Token indices sequence length is longer than the specified maximum sequence length for this model (898 > 512). Running this sequence through the model will result in indexing errors


6560 iterations hase been completed, and model is running for 440.54165506362915


Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors


6600 iterations hase been completed, and model is running for 443.107709646225


Token indices sequence length is longer than the specified maximum sequence length for this model (2097 > 512). Running this sequence through the model will result in indexing errors


6640 iterations hase been completed, and model is running for 445.6573135852814
6680 iterations hase been completed, and model is running for 448.13474702835083
6720 iterations hase been completed, and model is running for 450.7053611278534
6760 iterations hase been completed, and model is running for 453.33682799339294
6800 iterations hase been completed, and model is running for 455.9411656856537


Token indices sequence length is longer than the specified maximum sequence length for this model (684 > 512). Running this sequence through the model will result in indexing errors


6840 iterations hase been completed, and model is running for 458.44456219673157
6880 iterations hase been completed, and model is running for 461.0957806110382
6920 iterations hase been completed, and model is running for 464.2354726791382
6960 iterations hase been completed, and model is running for 466.85004925727844
7000 iterations hase been completed, and model is running for 469.48578691482544


Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (800 > 512). Running this sequence through the model will result in indexing errors


7040 iterations hase been completed, and model is running for 472.02378153800964
7080 iterations hase been completed, and model is running for 474.7024838924408


Token indices sequence length is longer than the specified maximum sequence length for this model (517 > 512). Running this sequence through the model will result in indexing errors


7120 iterations hase been completed, and model is running for 477.3007667064667


Token indices sequence length is longer than the specified maximum sequence length for this model (590 > 512). Running this sequence through the model will result in indexing errors


7160 iterations hase been completed, and model is running for 479.86666917800903


Token indices sequence length is longer than the specified maximum sequence length for this model (743 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2098 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (637 > 512). Running this sequence through the model will result in indexing errors


7200 iterations hase been completed, and model is running for 482.9584107398987


Token indices sequence length is longer than the specified maximum sequence length for this model (558 > 512). Running this sequence through the model will result in indexing errors


7240 iterations hase been completed, and model is running for 485.8827347755432


Token indices sequence length is longer than the specified maximum sequence length for this model (6048 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (586 > 512). Running this sequence through the model will result in indexing errors


7280 iterations hase been completed, and model is running for 488.4747393131256
7320 iterations hase been completed, and model is running for 490.95707178115845


Token indices sequence length is longer than the specified maximum sequence length for this model (612 > 512). Running this sequence through the model will result in indexing errors


7360 iterations hase been completed, and model is running for 493.84721326828003


Token indices sequence length is longer than the specified maximum sequence length for this model (2166 > 512). Running this sequence through the model will result in indexing errors


7400 iterations hase been completed, and model is running for 496.64507126808167


Token indices sequence length is longer than the specified maximum sequence length for this model (1128 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (767 > 512). Running this sequence through the model will result in indexing errors


7440 iterations hase been completed, and model is running for 499.2661302089691


Token indices sequence length is longer than the specified maximum sequence length for this model (609 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (851 > 512). Running this sequence through the model will result in indexing errors


7480 iterations hase been completed, and model is running for 502.07439517974854
7520 iterations hase been completed, and model is running for 504.57206082344055


Token indices sequence length is longer than the specified maximum sequence length for this model (600 > 512). Running this sequence through the model will result in indexing errors


7560 iterations hase been completed, and model is running for 507.19387674331665
7600 iterations hase been completed, and model is running for 509.7615566253662


Token indices sequence length is longer than the specified maximum sequence length for this model (6565 > 512). Running this sequence through the model will result in indexing errors


7640 iterations hase been completed, and model is running for 512.2549993991852


Token indices sequence length is longer than the specified maximum sequence length for this model (622 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1540 > 512). Running this sequence through the model will result in indexing errors


7680 iterations hase been completed, and model is running for 514.8569846153259
7720 iterations hase been completed, and model is running for 517.3507027626038


Token indices sequence length is longer than the specified maximum sequence length for this model (2017 > 512). Running this sequence through the model will result in indexing errors


7760 iterations hase been completed, and model is running for 519.8612995147705


Token indices sequence length is longer than the specified maximum sequence length for this model (641 > 512). Running this sequence through the model will result in indexing errors


7800 iterations hase been completed, and model is running for 522.583331823349


Token indices sequence length is longer than the specified maximum sequence length for this model (946 > 512). Running this sequence through the model will result in indexing errors


7840 iterations hase been completed, and model is running for 525.0733304023743
7880 iterations hase been completed, and model is running for 529.42032122612


Token indices sequence length is longer than the specified maximum sequence length for this model (582 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (798 > 512). Running this sequence through the model will result in indexing errors


7920 iterations hase been completed, and model is running for 532.4331471920013
7960 iterations hase been completed, and model is running for 535.1090633869171


Token indices sequence length is longer than the specified maximum sequence length for this model (978 > 512). Running this sequence through the model will result in indexing errors


8000 iterations hase been completed, and model is running for 537.8709704875946


Token indices sequence length is longer than the specified maximum sequence length for this model (1444 > 512). Running this sequence through the model will result in indexing errors


8040 iterations hase been completed, and model is running for 540.4366838932037
8080 iterations hase been completed, and model is running for 542.9886493682861


Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (531 > 512). Running this sequence through the model will result in indexing errors


8120 iterations hase been completed, and model is running for 545.8001339435577
8160 iterations hase been completed, and model is running for 548.3953199386597


Token indices sequence length is longer than the specified maximum sequence length for this model (949 > 512). Running this sequence through the model will result in indexing errors


8200 iterations hase been completed, and model is running for 551.5856301784515


Token indices sequence length is longer than the specified maximum sequence length for this model (897 > 512). Running this sequence through the model will result in indexing errors


8240 iterations hase been completed, and model is running for 554.1380264759064


Token indices sequence length is longer than the specified maximum sequence length for this model (2194 > 512). Running this sequence through the model will result in indexing errors


8280 iterations hase been completed, and model is running for 556.8071329593658


Token indices sequence length is longer than the specified maximum sequence length for this model (2452 > 512). Running this sequence through the model will result in indexing errors


8320 iterations hase been completed, and model is running for 559.4950938224792


Token indices sequence length is longer than the specified maximum sequence length for this model (517 > 512). Running this sequence through the model will result in indexing errors


8360 iterations hase been completed, and model is running for 562.093873500824


Token indices sequence length is longer than the specified maximum sequence length for this model (1260 > 512). Running this sequence through the model will result in indexing errors


8400 iterations hase been completed, and model is running for 564.6042695045471
8440 iterations hase been completed, and model is running for 567.7225923538208


Token indices sequence length is longer than the specified maximum sequence length for this model (607 > 512). Running this sequence through the model will result in indexing errors


8480 iterations hase been completed, and model is running for 570.3503630161285


Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors


8520 iterations hase been completed, and model is running for 572.8838589191437
8560 iterations hase been completed, and model is running for 575.4593870639801
8600 iterations hase been completed, and model is running for 578.2190747261047


Token indices sequence length is longer than the specified maximum sequence length for this model (572 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 512). Running this sequence through the model will result in indexing errors


8640 iterations hase been completed, and model is running for 581.0213322639465
8680 iterations hase been completed, and model is running for 584.4047393798828


Token indices sequence length is longer than the specified maximum sequence length for this model (4049 > 512). Running this sequence through the model will result in indexing errors


8720 iterations hase been completed, and model is running for 586.9005341529846
8760 iterations hase been completed, and model is running for 589.5471160411835
8800 iterations hase been completed, and model is running for 592.2239496707916
8840 iterations hase been completed, and model is running for 595.238753080368
8880 iterations hase been completed, and model is running for 597.8540415763855


Token indices sequence length is longer than the specified maximum sequence length for this model (853 > 512). Running this sequence through the model will result in indexing errors


8920 iterations hase been completed, and model is running for 600.5504133701324


Token indices sequence length is longer than the specified maximum sequence length for this model (1249 > 512). Running this sequence through the model will result in indexing errors


8960 iterations hase been completed, and model is running for 603.1194734573364


Token indices sequence length is longer than the specified maximum sequence length for this model (1101 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors


9000 iterations hase been completed, and model is running for 605.7586433887482


Token indices sequence length is longer than the specified maximum sequence length for this model (1318 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (664 > 512). Running this sequence through the model will result in indexing errors


9040 iterations hase been completed, and model is running for 608.505049943924


Token indices sequence length is longer than the specified maximum sequence length for this model (1002 > 512). Running this sequence through the model will result in indexing errors


9080 iterations hase been completed, and model is running for 611.0702562332153
9120 iterations hase been completed, and model is running for 613.6226041316986
9160 iterations hase been completed, and model is running for 616.1886417865753


Token indices sequence length is longer than the specified maximum sequence length for this model (591 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (734 > 512). Running this sequence through the model will result in indexing errors


9200 iterations hase been completed, and model is running for 618.7480223178864
9240 iterations hase been completed, and model is running for 621.5348496437073
9280 iterations hase been completed, and model is running for 624.298686504364
9320 iterations hase been completed, and model is running for 626.8227035999298


Token indices sequence length is longer than the specified maximum sequence length for this model (740 > 512). Running this sequence through the model will result in indexing errors


9360 iterations hase been completed, and model is running for 629.5237257480621


Token indices sequence length is longer than the specified maximum sequence length for this model (565 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2879 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3045 > 512). Running this sequence through the model will result in indexing errors


9400 iterations hase been completed, and model is running for 632.2836787700653


Token indices sequence length is longer than the specified maximum sequence length for this model (2147 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (777 > 512). Running this sequence through the model will result in indexing errors


9440 iterations hase been completed, and model is running for 635.9755458831787


Token indices sequence length is longer than the specified maximum sequence length for this model (553 > 512). Running this sequence through the model will result in indexing errors


9480 iterations hase been completed, and model is running for 638.7524785995483
9520 iterations hase been completed, and model is running for 641.2879300117493


Token indices sequence length is longer than the specified maximum sequence length for this model (2334 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (4282 > 512). Running this sequence through the model will result in indexing errors


9560 iterations hase been completed, and model is running for 644.2368543148041


Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (914 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


9600 iterations hase been completed, and model is running for 647.0702791213989
9640 iterations hase been completed, and model is running for 649.6199643611908


Token indices sequence length is longer than the specified maximum sequence length for this model (668 > 512). Running this sequence through the model will result in indexing errors


9680 iterations hase been completed, and model is running for 652.2019488811493


Token indices sequence length is longer than the specified maximum sequence length for this model (1156 > 512). Running this sequence through the model will result in indexing errors


9720 iterations hase been completed, and model is running for 654.7404277324677
9760 iterations hase been completed, and model is running for 657.4567105770111


Token indices sequence length is longer than the specified maximum sequence length for this model (1487 > 512). Running this sequence through the model will result in indexing errors


9800 iterations hase been completed, and model is running for 660.1388881206512
9840 iterations hase been completed, and model is running for 662.8936741352081


Token indices sequence length is longer than the specified maximum sequence length for this model (743 > 512). Running this sequence through the model will result in indexing errors


9880 iterations hase been completed, and model is running for 665.5986750125885


Token indices sequence length is longer than the specified maximum sequence length for this model (831 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1037 > 512). Running this sequence through the model will result in indexing errors


9920 iterations hase been completed, and model is running for 668.1550042629242
9960 iterations hase been completed, and model is running for 670.6542382240295


Token indices sequence length is longer than the specified maximum sequence length for this model (642 > 512). Running this sequence through the model will result in indexing errors


10000 iterations hase been completed, and model is running for 673.132119178772


Token indices sequence length is longer than the specified maximum sequence length for this model (591 > 512). Running this sequence through the model will result in indexing errors


10040 iterations hase been completed, and model is running for 675.68514752388
10080 iterations hase been completed, and model is running for 678.4440426826477
10120 iterations hase been completed, and model is running for 681.019781589508


Token indices sequence length is longer than the specified maximum sequence length for this model (1068 > 512). Running this sequence through the model will result in indexing errors


10160 iterations hase been completed, and model is running for 683.7633554935455


Token indices sequence length is longer than the specified maximum sequence length for this model (6808 > 512). Running this sequence through the model will result in indexing errors


10200 iterations hase been completed, and model is running for 686.642404794693


Token indices sequence length is longer than the specified maximum sequence length for this model (5480 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (640 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3952 > 512). Running this sequence through the model will result in indexing errors


10240 iterations hase been completed, and model is running for 689.8802690505981
10280 iterations hase been completed, and model is running for 692.3814628124237


Token indices sequence length is longer than the specified maximum sequence length for this model (1139 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (583 > 512). Running this sequence through the model will result in indexing errors


10320 iterations hase been completed, and model is running for 694.865713596344
10360 iterations hase been completed, and model is running for 697.3613646030426
10400 iterations hase been completed, and model is running for 699.9473583698273


Token indices sequence length is longer than the specified maximum sequence length for this model (3519 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (524 > 512). Running this sequence through the model will result in indexing errors


10440 iterations hase been completed, and model is running for 702.6770477294922


Token indices sequence length is longer than the specified maximum sequence length for this model (978 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2147 > 512). Running this sequence through the model will result in indexing errors


10480 iterations hase been completed, and model is running for 705.2827270030975
10520 iterations hase been completed, and model is running for 707.9017548561096
10560 iterations hase been completed, and model is running for 710.4367198944092


Token indices sequence length is longer than the specified maximum sequence length for this model (845 > 512). Running this sequence through the model will result in indexing errors


10600 iterations hase been completed, and model is running for 713.1932446956635


Token indices sequence length is longer than the specified maximum sequence length for this model (817 > 512). Running this sequence through the model will result in indexing errors


10640 iterations hase been completed, and model is running for 715.921689748764


Token indices sequence length is longer than the specified maximum sequence length for this model (887 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1176 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (590 > 512). Running this sequence through the model will result in indexing errors


10680 iterations hase been completed, and model is running for 718.5348875522614
10720 iterations hase been completed, and model is running for 721.2549171447754
10760 iterations hase been completed, and model is running for 724.7239398956299


Token indices sequence length is longer than the specified maximum sequence length for this model (1099 > 512). Running this sequence through the model will result in indexing errors


10800 iterations hase been completed, and model is running for 727.4743309020996


Token indices sequence length is longer than the specified maximum sequence length for this model (960 > 512). Running this sequence through the model will result in indexing errors


10840 iterations hase been completed, and model is running for 730.2281744480133


Token indices sequence length is longer than the specified maximum sequence length for this model (2163 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (572 > 512). Running this sequence through the model will result in indexing errors


10880 iterations hase been completed, and model is running for 732.761396408081


Token indices sequence length is longer than the specified maximum sequence length for this model (4475 > 512). Running this sequence through the model will result in indexing errors


10920 iterations hase been completed, and model is running for 735.3219878673553


Token indices sequence length is longer than the specified maximum sequence length for this model (803 > 512). Running this sequence through the model will result in indexing errors


10960 iterations hase been completed, and model is running for 737.854838848114
11000 iterations hase been completed, and model is running for 740.5405945777893


Token indices sequence length is longer than the specified maximum sequence length for this model (2251 > 512). Running this sequence through the model will result in indexing errors


11040 iterations hase been completed, and model is running for 743.2512814998627
11080 iterations hase been completed, and model is running for 746.0111434459686
11120 iterations hase been completed, and model is running for 748.9023287296295


Token indices sequence length is longer than the specified maximum sequence length for this model (595 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (598 > 512). Running this sequence through the model will result in indexing errors


11160 iterations hase been completed, and model is running for 751.8160936832428
11200 iterations hase been completed, and model is running for 754.4149954319


Token indices sequence length is longer than the specified maximum sequence length for this model (580 > 512). Running this sequence through the model will result in indexing errors


11240 iterations hase been completed, and model is running for 756.9940860271454
11280 iterations hase been completed, and model is running for 759.7167963981628
11320 iterations hase been completed, and model is running for 762.2649853229523


Token indices sequence length is longer than the specified maximum sequence length for this model (1023 > 512). Running this sequence through the model will result in indexing errors


11360 iterations hase been completed, and model is running for 765.1017549037933


Token indices sequence length is longer than the specified maximum sequence length for this model (731 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (693 > 512). Running this sequence through the model will result in indexing errors


11400 iterations hase been completed, and model is running for 767.6605455875397
11440 iterations hase been completed, and model is running for 770.3920795917511
11480 iterations hase been completed, and model is running for 772.9864735603333


Token indices sequence length is longer than the specified maximum sequence length for this model (846 > 512). Running this sequence through the model will result in indexing errors


11520 iterations hase been completed, and model is running for 775.5555212497711
11560 iterations hase been completed, and model is running for 778.0402495861053


Token indices sequence length is longer than the specified maximum sequence length for this model (3758 > 512). Running this sequence through the model will result in indexing errors


11600 iterations hase been completed, and model is running for 780.7287964820862
11640 iterations hase been completed, and model is running for 783.3599708080292
11680 iterations hase been completed, and model is running for 786.1294980049133


Token indices sequence length is longer than the specified maximum sequence length for this model (1402 > 512). Running this sequence through the model will result in indexing errors


11720 iterations hase been completed, and model is running for 788.8468742370605
11760 iterations hase been completed, and model is running for 791.8915059566498
11800 iterations hase been completed, and model is running for 795.3838450908661
11840 iterations hase been completed, and model is running for 797.876843214035
11880 iterations hase been completed, and model is running for 800.4648530483246


Token indices sequence length is longer than the specified maximum sequence length for this model (1527 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors


11920 iterations hase been completed, and model is running for 803.6095492839813
11960 iterations hase been completed, and model is running for 806.2846527099609
12000 iterations hase been completed, and model is running for 808.8828899860382


Token indices sequence length is longer than the specified maximum sequence length for this model (623 > 512). Running this sequence through the model will result in indexing errors


12040 iterations hase been completed, and model is running for 813.3911592960358
12080 iterations hase been completed, and model is running for 816.0184669494629


Token indices sequence length is longer than the specified maximum sequence length for this model (2323 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1329 > 512). Running this sequence through the model will result in indexing errors


12120 iterations hase been completed, and model is running for 818.7142775058746


Token indices sequence length is longer than the specified maximum sequence length for this model (746 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (926 > 512). Running this sequence through the model will result in indexing errors


12160 iterations hase been completed, and model is running for 821.428320646286
12200 iterations hase been completed, and model is running for 823.9428496360779


Token indices sequence length is longer than the specified maximum sequence length for this model (1188 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1987 > 512). Running this sequence through the model will result in indexing errors


12240 iterations hase been completed, and model is running for 826.6577868461609


Token indices sequence length is longer than the specified maximum sequence length for this model (567 > 512). Running this sequence through the model will result in indexing errors


12280 iterations hase been completed, and model is running for 829.8635613918304


Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (756 > 512). Running this sequence through the model will result in indexing errors


12320 iterations hase been completed, and model is running for 832.3852245807648
12360 iterations hase been completed, and model is running for 835.0407660007477
12400 iterations hase been completed, and model is running for 837.6575331687927


Token indices sequence length is longer than the specified maximum sequence length for this model (804 > 512). Running this sequence through the model will result in indexing errors


12440 iterations hase been completed, and model is running for 840.3048188686371


Token indices sequence length is longer than the specified maximum sequence length for this model (1226 > 512). Running this sequence through the model will result in indexing errors


12480 iterations hase been completed, and model is running for 843.0698907375336


Token indices sequence length is longer than the specified maximum sequence length for this model (3010 > 512). Running this sequence through the model will result in indexing errors


12520 iterations hase been completed, and model is running for 845.7290391921997
12560 iterations hase been completed, and model is running for 848.3724799156189


Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1987 > 512). Running this sequence through the model will result in indexing errors


12600 iterations hase been completed, and model is running for 851.0218110084534
12640 iterations hase been completed, and model is running for 853.6075642108917
12680 iterations hase been completed, and model is running for 856.2977437973022
12720 iterations hase been completed, and model is running for 858.944712638855


Token indices sequence length is longer than the specified maximum sequence length for this model (586 > 512). Running this sequence through the model will result in indexing errors


12760 iterations hase been completed, and model is running for 861.7107825279236
12800 iterations hase been completed, and model is running for 864.2051668167114


Token indices sequence length is longer than the specified maximum sequence length for this model (1125 > 512). Running this sequence through the model will result in indexing errors
  "Palette images with Transparency expressed in bytes should be "


12840 iterations hase been completed, and model is running for 866.8670344352722
12880 iterations hase been completed, and model is running for 869.5995664596558


Token indices sequence length is longer than the specified maximum sequence length for this model (1668 > 512). Running this sequence through the model will result in indexing errors


12920 iterations hase been completed, and model is running for 872.3227224349976


Token indices sequence length is longer than the specified maximum sequence length for this model (571 > 512). Running this sequence through the model will result in indexing errors


12960 iterations hase been completed, and model is running for 874.9327282905579


Token indices sequence length is longer than the specified maximum sequence length for this model (1087 > 512). Running this sequence through the model will result in indexing errors


13000 iterations hase been completed, and model is running for 877.5240044593811


Token indices sequence length is longer than the specified maximum sequence length for this model (1410 > 512). Running this sequence through the model will result in indexing errors


13040 iterations hase been completed, and model is running for 880.237340927124
13080 iterations hase been completed, and model is running for 882.839560508728


Token indices sequence length is longer than the specified maximum sequence length for this model (767 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (634 > 512). Running this sequence through the model will result in indexing errors


13120 iterations hase been completed, and model is running for 885.4738218784332


  "Palette images with Transparency expressed in bytes should be "


13160 iterations hase been completed, and model is running for 888.1906566619873
13200 iterations hase been completed, and model is running for 890.833509683609


Token indices sequence length is longer than the specified maximum sequence length for this model (1134 > 512). Running this sequence through the model will result in indexing errors


13240 iterations hase been completed, and model is running for 893.5493535995483


Token indices sequence length is longer than the specified maximum sequence length for this model (971 > 512). Running this sequence through the model will result in indexing errors


13280 iterations hase been completed, and model is running for 896.320937871933
13320 iterations hase been completed, and model is running for 898.8024871349335


Token indices sequence length is longer than the specified maximum sequence length for this model (1613 > 512). Running this sequence through the model will result in indexing errors


13360 iterations hase been completed, and model is running for 901.355489730835
13400 iterations hase been completed, and model is running for 904.1298384666443


Token indices sequence length is longer than the specified maximum sequence length for this model (978 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2434 > 512). Running this sequence through the model will result in indexing errors


13440 iterations hase been completed, and model is running for 906.8283874988556
13480 iterations hase been completed, and model is running for 909.3740491867065
13520 iterations hase been completed, and model is running for 912.1686050891876
13560 iterations hase been completed, and model is running for 914.9293165206909
13600 iterations hase been completed, and model is running for 917.573942899704


Token indices sequence length is longer than the specified maximum sequence length for this model (883 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (925 > 512). Running this sequence through the model will result in indexing errors


13640 iterations hase been completed, and model is running for 920.2509450912476
13680 iterations hase been completed, and model is running for 922.8999650478363


Token indices sequence length is longer than the specified maximum sequence length for this model (549 > 512). Running this sequence through the model will result in indexing errors


13720 iterations hase been completed, and model is running for 926.4069163799286


Token indices sequence length is longer than the specified maximum sequence length for this model (606 > 512). Running this sequence through the model will result in indexing errors


13760 iterations hase been completed, and model is running for 929.0269839763641
13800 iterations hase been completed, and model is running for 931.5479428768158


Token indices sequence length is longer than the specified maximum sequence length for this model (2779 > 512). Running this sequence through the model will result in indexing errors


13840 iterations hase been completed, and model is running for 933.9749646186829
13880 iterations hase been completed, and model is running for 936.7073683738708
13920 iterations hase been completed, and model is running for 939.28409075737


Token indices sequence length is longer than the specified maximum sequence length for this model (706 > 512). Running this sequence through the model will result in indexing errors


13960 iterations hase been completed, and model is running for 941.788824558258
14000 iterations hase been completed, and model is running for 944.6902506351471
14040 iterations hase been completed, and model is running for 947.3900451660156


Token indices sequence length is longer than the specified maximum sequence length for this model (662 > 512). Running this sequence through the model will result in indexing errors


14080 iterations hase been completed, and model is running for 950.0482547283173
14120 iterations hase been completed, and model is running for 952.7841510772705
14160 iterations hase been completed, and model is running for 955.3656489849091
14200 iterations hase been completed, and model is running for 957.9762935638428


Token indices sequence length is longer than the specified maximum sequence length for this model (8020 > 512). Running this sequence through the model will result in indexing errors


14240 iterations hase been completed, and model is running for 960.6807560920715


Token indices sequence length is longer than the specified maximum sequence length for this model (2434 > 512). Running this sequence through the model will result in indexing errors


14280 iterations hase been completed, and model is running for 963.1128127574921


Token indices sequence length is longer than the specified maximum sequence length for this model (620 > 512). Running this sequence through the model will result in indexing errors


14320 iterations hase been completed, and model is running for 966.3335247039795


Token indices sequence length is longer than the specified maximum sequence length for this model (539 > 512). Running this sequence through the model will result in indexing errors


14360 iterations hase been completed, and model is running for 968.8877980709076


Token indices sequence length is longer than the specified maximum sequence length for this model (800 > 512). Running this sequence through the model will result in indexing errors


14400 iterations hase been completed, and model is running for 971.6214489936829
14440 iterations hase been completed, and model is running for 974.4842233657837
14480 iterations hase been completed, and model is running for 977.259352684021
14520 iterations hase been completed, and model is running for 979.9037673473358


Token indices sequence length is longer than the specified maximum sequence length for this model (756 > 512). Running this sequence through the model will result in indexing errors


14560 iterations hase been completed, and model is running for 982.5344793796539


Token indices sequence length is longer than the specified maximum sequence length for this model (783 > 512). Running this sequence through the model will result in indexing errors


14600 iterations hase been completed, and model is running for 985.0592339038849
14640 iterations hase been completed, and model is running for 987.774941444397


Token indices sequence length is longer than the specified maximum sequence length for this model (614 > 512). Running this sequence through the model will result in indexing errors


14680 iterations hase been completed, and model is running for 990.3093404769897


Token indices sequence length is longer than the specified maximum sequence length for this model (2960 > 512). Running this sequence through the model will result in indexing errors


14720 iterations hase been completed, and model is running for 992.9417741298676
14760 iterations hase been completed, and model is running for 995.6411328315735


Token indices sequence length is longer than the specified maximum sequence length for this model (1173 > 512). Running this sequence through the model will result in indexing errors


14800 iterations hase been completed, and model is running for 998.187974691391
14840 iterations hase been completed, and model is running for 1000.842520236969
14880 iterations hase been completed, and model is running for 1003.4431247711182


Token indices sequence length is longer than the specified maximum sequence length for this model (1571 > 512). Running this sequence through the model will result in indexing errors


14920 iterations hase been completed, and model is running for 1006.241973400116
14960 iterations hase been completed, and model is running for 1008.8976044654846
15000 iterations hase been completed, and model is running for 1011.6094899177551


Token indices sequence length is longer than the specified maximum sequence length for this model (581 > 512). Running this sequence through the model will result in indexing errors


15040 iterations hase been completed, and model is running for 1014.2237613201141
15080 iterations hase been completed, and model is running for 1016.7322328090668


Token indices sequence length is longer than the specified maximum sequence length for this model (8836 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2009 > 512). Running this sequence through the model will result in indexing errors


15120 iterations hase been completed, and model is running for 1019.3913731575012
15160 iterations hase been completed, and model is running for 1022.1298689842224
15200 iterations hase been completed, and model is running for 1024.6344339847565


Token indices sequence length is longer than the specified maximum sequence length for this model (1624 > 512). Running this sequence through the model will result in indexing errors


15240 iterations hase been completed, and model is running for 1027.0838050842285
15280 iterations hase been completed, and model is running for 1029.5447525978088


Token indices sequence length is longer than the specified maximum sequence length for this model (2187 > 512). Running this sequence through the model will result in indexing errors


15320 iterations hase been completed, and model is running for 1032.2491858005524
15360 iterations hase been completed, and model is running for 1034.9084236621857


Token indices sequence length is longer than the specified maximum sequence length for this model (2954 > 512). Running this sequence through the model will result in indexing errors


15400 iterations hase been completed, and model is running for 1037.4209315776825
15440 iterations hase been completed, and model is running for 1040.0242447853088


Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this sequence through the model will result in indexing errors


15480 iterations hase been completed, and model is running for 1043.3974595069885
15520 iterations hase been completed, and model is running for 1046.0602066516876


Token indices sequence length is longer than the specified maximum sequence length for this model (683 > 512). Running this sequence through the model will result in indexing errors


15560 iterations hase been completed, and model is running for 1048.7634880542755


Token indices sequence length is longer than the specified maximum sequence length for this model (785 > 512). Running this sequence through the model will result in indexing errors


15600 iterations hase been completed, and model is running for 1053.04265499115


Token indices sequence length is longer than the specified maximum sequence length for this model (638 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (803 > 512). Running this sequence through the model will result in indexing errors


15640 iterations hase been completed, and model is running for 1055.885368347168
15680 iterations hase been completed, and model is running for 1058.4763016700745
15720 iterations hase been completed, and model is running for 1061.0568656921387


Token indices sequence length is longer than the specified maximum sequence length for this model (637 > 512). Running this sequence through the model will result in indexing errors


15760 iterations hase been completed, and model is running for 1063.7932069301605
15800 iterations hase been completed, and model is running for 1066.4520564079285


Token indices sequence length is longer than the specified maximum sequence length for this model (539 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (4234 > 512). Running this sequence through the model will result in indexing errors


15840 iterations hase been completed, and model is running for 1069.2252039909363
15880 iterations hase been completed, and model is running for 1071.9810767173767


Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors


15920 iterations hase been completed, and model is running for 1074.4127852916718
15960 iterations hase been completed, and model is running for 1077.3755156993866


Token indices sequence length is longer than the specified maximum sequence length for this model (1208 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (762 > 512). Running this sequence through the model will result in indexing errors


16000 iterations hase been completed, and model is running for 1080.1627659797668


Token indices sequence length is longer than the specified maximum sequence length for this model (1086 > 512). Running this sequence through the model will result in indexing errors


16040 iterations hase been completed, and model is running for 1084.9981126785278


Token indices sequence length is longer than the specified maximum sequence length for this model (575 > 512). Running this sequence through the model will result in indexing errors


16080 iterations hase been completed, and model is running for 1087.5120360851288


Token indices sequence length is longer than the specified maximum sequence length for this model (975 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3859 > 512). Running this sequence through the model will result in indexing errors


16120 iterations hase been completed, and model is running for 1090.1891651153564
16160 iterations hase been completed, and model is running for 1092.7194130420685


Token indices sequence length is longer than the specified maximum sequence length for this model (582 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1107 > 512). Running this sequence through the model will result in indexing errors


16200 iterations hase been completed, and model is running for 1095.3064110279083


Token indices sequence length is longer than the specified maximum sequence length for this model (6220 > 512). Running this sequence through the model will result in indexing errors


16240 iterations hase been completed, and model is running for 1097.872594833374


Token indices sequence length is longer than the specified maximum sequence length for this model (626 > 512). Running this sequence through the model will result in indexing errors


16280 iterations hase been completed, and model is running for 1100.9421467781067


Token indices sequence length is longer than the specified maximum sequence length for this model (879 > 512). Running this sequence through the model will result in indexing errors


16320 iterations hase been completed, and model is running for 1103.5968458652496
16360 iterations hase been completed, and model is running for 1106.2309594154358


Token indices sequence length is longer than the specified maximum sequence length for this model (1266 > 512). Running this sequence through the model will result in indexing errors


16400 iterations hase been completed, and model is running for 1109.1844940185547


Token indices sequence length is longer than the specified maximum sequence length for this model (552 > 512). Running this sequence through the model will result in indexing errors


16440 iterations hase been completed, and model is running for 1111.9995603561401


0.00011871156652301434

# Rough