In [1]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd drive/My\ Drive/

/content/drive/My Drive


In [3]:
# !pip install pretrainedmodels
# !pip install pytorch_pretrained_bert pytorch-nlp

In [4]:
import torch
import torch.nn as nn
import torchvision
from pytorch_pretrained_bert import BertModel,BertAdam,BertTokenizer
import torchvision.models as M
from torch.utils.data import Dataset,DataLoader
from torchvision.transforms import transforms
from sklearn.metrics import accuracy_score,roc_auc_score,roc_curve
import pandas as pd
# from transformers import BertTokenizer
import tqdm.notebook as tq
import random
from tqdm import tqdm
import os
import numpy as np
from PIL import Image
import functools
import json
import pretrainedmodels.models
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
class Vocab(object):
    def __init__(self, emptyInit=False):
        if emptyInit:
            self.stoi, self.itos, self.vocab_sz = {}, [], 0
        else:
            self.stoi = {
                w: i
                for i, w in enumerate(["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"])
            }
            self.itos = [w for w in self.stoi]
            self.vocab_sz = len(self.itos)

    def add(self, words):
        cnt = len(self.itos)
        for w in words:
            if w in self.stoi:
                continue
            self.stoi[w] = cnt
            self.itos.append(w)
            cnt += 1
        self.vocab_sz = len(self.itos)

def truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place to the maximum length.
    Copied from https://github.com/huggingface/pytorch-pretrained-BERT
    """
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()


def numpy_seed(seed, *addl_seeds):
    """Context manager which seeds the NumPy PRNG with the specified seed and
    restores the state afterward"""
    if seed is None:
        yield
        return
    if len(addl_seeds) > 0:
        seed = int(hash((seed, *addl_seeds)) % 1e6)
    state = np.random.get_state()
    np.random.seed(seed)
    try:
        yield
    finally:
        np.random.set_state(state)


def get_vocab(hparams):
    vocab = Vocab()
    model=hparams.get('model','concatbert')
    if model in ["bert", "mmbt", "concatbert"]:
        bert_tokenizer = BertTokenizer.from_pretrained(
            'bert-base-uncased', do_lower_case=True
        )
        vocab.stoi = bert_tokenizer.vocab
        vocab.itos = bert_tokenizer.ids_to_tokens
        vocab.vocab_sz = len(vocab.itos)


    return vocab


def trainTransforms():
  return transforms.Compose(
      [
       transforms.Resize(256),
       transforms.CenterCrop(224),
       
       transforms.RandomRotation(15),
       transforms.RandomHorizontalFlip(),
       transforms.ColorJitter(brightness=0.4,
                               contrast=0.4,
                               saturation=0.4,
                               hue=0.2),
       transforms.ToTensor(),
       transforms.Normalize(
           mean=[0.46777044, 0.44531429, 0.40661017],
                std=[0.12221994, 0.12145835, 0.14380469],
       ),
       
      ]
  )
def valTransforms():
    return transforms.Compose(
        [
            transforms.Resize(299),
            transforms.CenterCrop(299),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.46777044, 0.44531429, 0.40661017],
                std=[0.12221994, 0.12145835, 0.14380469],
            ),
        ]
    )
    
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

In [6]:
class memesDataset(Dataset):
  def __init__(self,data_path,img_dir, tokenizer, transforms, vocab, hparams,test=False):
    self.data = [json.loads(l) for l in open(data_path)]
    # self.data_dir = os.path.dirname(data_path)
    self.img_dir=img_dir
    self.tokenizer = tokenizer
    self.hparams = hparams
    self.vocab = vocab
    self.n_classes = 2
    self.model=self.hparams.get("model",'concatbert')
    drop_image_percent=self.hparams.get("drop_image_percent",0.0)
    max_seq_len=self.hparams.get("max_seq_len",200)
    num_image_embeds=self.hparams.get("num_image_embeds",1)
    self.text_start_token = ["[CLS]"] if self.model != "mmbt" else ["[SEP]"]
    self.test=test

    # with numpy_seed(0):
    #     for row in self.data:
    #         if np.random.random() < drop_img_percent:
    #             row["img"] = None

    self.max_seq_len = max_seq_len
    if self.model == "mmbt":
        self.max_seq_len -= num_image_embeds

    self.transforms = transforms

  def __len__(self):
    return len(self.data)
    # return 10

  def __getitem__(self,index):
    
    sentence = (self.text_start_token+ self.tokenizer(self.data[index]["text"])[: (self.max_seq_len - 1)])
    segment = torch.zeros(len(sentence))

    sentence = torch.LongTensor(
        [
            self.vocab.stoi[w] if w in self.vocab.stoi else self.vocab.stoi["[UNK]"]
            for w in sentence
        ]
    )

    id=self.data[index]['id']
    if self.test==False:
      label = torch.LongTensor([self.data[index]["label"]])
    else:
      label=None

    image = None
    if self.model in ["img", "concatbow", "concatbert", "mmbt","ensemble"]:
      
      
      image = Image.open(os.path.join(self.img_dir, self.data[index]["img"])).convert("RGB")
      # print(type(image))
      image = self.transforms(image)
    
    if self.model == "mmbt":
      # The first SEP is part of Image Token.
      segment = segment[1:]
      sentence = sentence[1:]
      # The first segment (0) is of images.
      segment += 1

    return sentence, segment, image, label
   

In [7]:

def collate_fn(batch, hparams,test=False):
    lens = [len(row[0]) for row in batch]
    bsz, max_seq_len = len(batch), max(lens)

    mask_tensor = torch.zeros(bsz, max_seq_len).long()
    text_tensor = torch.zeros(bsz, max_seq_len).long()
    segment_tensor = torch.zeros(bsz, max_seq_len).long()
    model=hparams.get('model','concatbert')
    img_tensor = None
    
    if model in ["img", "concatbow", "concatbert", "mmbt","ensemble"]:
        img_tensor = torch.stack([row[2] for row in batch])

    if test==False:
      
      tgt_tensor = torch.cat([row[3] for row in batch]).long()
    else:
      tgt_tensor=None

    for i_batch, (input_row, length) in enumerate(zip(batch, lens)):
        tokens, segment = input_row[:2]
        text_tensor[i_batch, :length] = tokens
        segment_tensor[i_batch, :length] = segment
        mask_tensor[i_batch, :length] = 1
    # id=[row[4] for row in batch]

    #uncomment for test set
    # return_val={'txt':text_tensor,'segment':segment_tensor,'mask':mask_tensor,'img':img_tensor,'tgt':tgt_tensor,'id':id}
    return text_tensor, segment_tensor, mask_tensor, img_tensor, tgt_tensor
    # return return_val

In [8]:
# vocab=None
# vocab_sz=None

def get_loaders(hparams):

  tokenizer = (BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True).tokenize)
  
  # transforms=get_transforms()
  global vocab
  global vocab_sz
  vocab = get_vocab(hparams)
  vocab_sz = vocab.vocab_sz
  n_classes=2
  train_path=hparams.get("train_path")
  img_dir=hparams.get("img_dir")
  train_transform=trainTransforms()
  val_transform=valTransforms()
  train_set=memesDataset(train_path,img_dir,tokenizer,val_transform,vocab,hparams)
  dev_path=hparams.get("dev_path")
  dev_set=memesDataset(dev_path,img_dir,tokenizer,val_transform,vocab,hparams)
  test_path=hparams.get("test_path")
  test_set=memesDataset(test_path,img_dir,tokenizer,val_transform,vocab,hparams,test=True)


  collate = functools.partial(collate_fn,hparams=hparams,test=False)



  batch_size=hparams.get('batch_size',8)
  num_workers=hparams.get('num_workers',20)
  
  train_dataloader=DataLoader(train_set,shuffle=True,batch_size=batch_size,num_workers=num_workers,collate_fn=collate)
  dev_dataloader=DataLoader(dev_set,shuffle=False,batch_size=batch_size,num_workers=num_workers,collate_fn=collate)
  collate = functools.partial(collate_fn,hparams=hparams,test=True)
  test_dataloader=DataLoader(test_set,shuffle=False,batch_size=batch_size,num_workers=num_workers,collate_fn=collate)
  # print(np.shape(train_dataloader))
  # text_tensor, segment_tensor, mask_tensor, img_tensor, tgt_tensor,id=test_dataloader

  # test_loader={'txt':text_tensor,'segment':segment_tensor,'mask':mask_tensor,'img':img_tensor,'id':id}

  return train_dataloader,dev_dataloader,test_dataloader,len(train_set)




In [9]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x
class imageEnc(nn.Module):
    def __init__(self, hparams):
        super(imageEnc, self).__init__()
        self.hparams = hparams
        self.model=self.hparams.get("vision_model",resnet152)
        # self.model = torchvision.models.resnet152(pretrained=True)
        self.modules = list(self.model.children())[:-2] #2 for resnet
        # # print(self.modules)
        
        self.model = nn.Sequential(*self.modules).to(device)
        

        self.pool = nn.AdaptiveAvgPool2d((1, 1)) if self.hparams.get("pool") == "avg" else nn.AdaptiveMaxPool2d((1, 1))

    def forward(self, x):
        
        # x=self.model(x)
        # x=x.logits 
        #for resnet use this thing
        # print(np.shape(x))
        x = self.pool(self.model(x))  
        # x=self.model(x)
        
        x = torch.flatten(x, start_dim=2) #2 for resnet
        x = x.transpose(1, 2).contiguous()  ##2 for resnet
        # print(np.shape(x))
        return x

class bertEnc(nn.Module):
    def __init__(self,hparmas):
        super(bertEnc,self).__init__()
        self.hparams=hparmas
        self.model=BertModel.from_pretrained("bert-base-uncased")

    def forward(self,txt, mask, segment):
        _, out = self.model(txt,token_type_ids=segment,attention_mask=mask,output_all_encoded_layers=False,)
        return out
# class BertEncoder(nn.Module):
#     def __init__(self, hparams):
#         super(BertEncoder, self).__init__()
#         self.hparams = hparams
#         self.bert = BertModel.from_pretrained('bert-base-uncased')

#     def forward(self, txt, mask, segment):
#         _, out = self.bert(
#             txt,
#             token_type_ids=segment,
#             attention_mask=mask,
#             output_all_encoded_layers=False,
#         )
#         return out


class BertClf(nn.Module):
    def __init__(self, hparams):
        super(BertClf, self).__init__()
        self.hparams = hparams
        self.enc = BertEncoder(hparams)
        self.clf = nn.Linear(768,2)
        self.clf.apply(self.enc.bert.init_bert_weights)

    def forward(self, txt, mask, segment):
        x = self.enc(txt, mask, segment)
        return self.clf(x)


In [10]:
class imageBert(nn.Module):
  def __init__(self,hparams,embeddings):
    super(imageBert,self).__init__()
    self.hparams=hparams
    self.image_hidden_size=self.hparams.get("image_hidden_size",2048)
    self.hidden_size=self.hparams.get('hidden_sz',768)
    self.img_embeddings=nn.Linear(self.image_hidden_size,self.hidden_size).to(device)
    self.position_embeddings=embeddings.position_embeddings
    self.token_type_embeddings=embeddings.token_type_embeddings
    self.word_embeddings=embeddings.word_embeddings
    self.LayerNorm = embeddings.LayerNorm
    self.dropout = nn.Dropout(p=0.1)

  def forward(self,input_imgs,token_type_ids):
    bsz=input_imgs.size(0)
    num_image_embeds=self.hparams.get("num_image_embeds",1)
    seq_length=num_image_embeds+2
    cls_id = torch.LongTensor([vocab.stoi["[CLS]"]]).cuda()
    cls_id = cls_id.unsqueeze(0).expand(bsz, 1)
    cls_token_embeds = self.word_embeddings(cls_id)

    sep_id = torch.LongTensor([vocab.stoi["[SEP]"]]).cuda()
    sep_id = sep_id.unsqueeze(0).expand(bsz, 1)
    sep_token_embeds = self.word_embeddings(sep_id)

    imgs_embeddings = self.img_embeddings(input_imgs)
    # print(np.shape(imgs_embeddings))
    token_embeddings = torch.cat(
        [cls_token_embeds, imgs_embeddings, sep_token_embeds], dim=1
    )

    position_ids = torch.arange(seq_length, dtype=torch.long).cuda()
    position_ids = position_ids.unsqueeze(0).expand(bsz, seq_length)
    position_embeddings = self.position_embeddings(position_ids)
    token_type_embeddings = self.token_type_embeddings(token_type_ids)
   
    embeddings = token_embeddings + position_embeddings + token_type_embeddings
    embeddings = self.LayerNorm(embeddings)
    embeddings = self.dropout(embeddings)
    return embeddings



class MultimodalBertEncoder(nn.Module):
    def __init__(self, hparams):
      super(MultimodalBertEncoder, self).__init__()
      self.hparams = hparams
      bert = BertModel.from_pretrained("bert-base-uncased")
      self.txt_embeddings = bert.embeddings

      self.img_embeddings = imageBert(self.hparams, self.txt_embeddings).to(device)
      self.img_encoder = imageEnc(self.hparams).to(device)
      self.encoder = bert.encoder
      self.pooler = bert.pooler
      self.hidden_sz=self.hparams.get("hidden_sz",768)
      self.n_classes=self.hparams.get('n_classes',2)
      self.clf = nn.Linear(self.hidden_sz, self.n_classes).to(device)
      self.num_image_embeds=self.hparams.get("num_image_embeds",1)


    def forward(self, input_txt, attention_mask, segment, input_img):
      bsz = input_txt.size(0)
      attention_mask = torch.cat(
          [
              torch.ones(bsz, self.num_image_embeds + 2).long().cuda(),
              attention_mask,
          ],
          dim=1,
      )
      extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
      extended_attention_mask = extended_attention_mask.to(
          dtype=next(self.parameters()).dtype
      )
      extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

      img_tok = (
          torch.LongTensor(input_txt.size(0), self.num_image_embeds + 2)
          .fill_(0)
          .cuda()
      )
      img = self.img_encoder(input_img)  # BxNx3x224x224 -> BxNx2048
      img_embed_out = self.img_embeddings(img, img_tok)
      txt_embed_out = self.txt_embeddings(input_txt, segment)
      encoder_input = torch.cat([img_embed_out, txt_embed_out], 1)  # Bx(TEXT+IMG)xHID
      
      
      encoded_layers = self.encoder(
          encoder_input, extended_attention_mask, output_all_encoded_layers=False
      )

      return self.pooler(encoded_layers[-1])


class MultimodalBertClf(nn.Module):
    def __init__(self, hparams):
        super(MultimodalBertClf, self).__init__()
        self.hparams = hparams
        self.enc = MultimodalBertEncoder(self.hparams)
        hidden_sz=self.hparams.get('hidden_sz',768)
        n_classes=2
        self.clf = nn.Linear(hidden_sz, n_classes).to(device)

    def forward(self, txt, mask, segment, img):
        x = self.enc(txt, mask, segment, img)
        return self.clf(x)

class ConcatBert(nn.Module):
  def __init__(self,hparams):
    super(ConcatBert,self).__init__()
    self.hparams=hparams
    self.image=imageEnc(self.hparams)
    self.bert=bertEnc(self.hparams)
    self.last_size=768+2048
    self.clf=nn.Linear(self.last_size,2).to(device)
    self.hiddens=[1000,500,300]
    self.main_layers=nn.ModuleList()
    # for size in self.hiddens:
    #   self.main_layers.append(nn.Linear(self.last_size,size))
    #   self.main_layers.append(nn.BatchNorm1d(size))
    #   self.main_layers.append(nn.Dropout(0.1))
    #   self.main_layers.append(nn.ReLU())
    #   self.last_size=size
    self.main_layers.append(nn.Linear(self.last_size,768).to(device))



  def forward(self,txt, mask, segment,image):
    text=self.bert(txt, mask, segment)
    image=self.image(image)
    image=torch.flatten(image,start_dim=1)
    
    out=torch.cat([text,image],-1)
    
    for layer in self.main_layers:
      out=layer(out)

    return out
    

class ensembleBert(nn.Module):
  def __init__(self,hparams):
    super(ensembleBert,self).__init__()
    self.hparams=hparams
    self.concatBert=ConcatBert(self.hparams)
    self.mmbt=MultimodalBertClf(self.hparams)
    self.enc = MultimodalBertEncoder(self.hparams)
    hidden_sz=self.hparams.get('hidden_sz',768)
    n_classes=2
    self.clf = nn.Linear(hidden_sz, n_classes).to(device)

    
  def forward(self,txt,mask,segment,image):
    x1 = self.enc(txt, mask, segment, image)
  
    x2=self.concatBert(txt,mask,segment,image)
    
    x=torch.cat([x1,x2],1)
    
    x=nn.Linear(2*768,768).to(device)(x)
    x=nn.Linear(768,2).to(device)(x)
  
    
    return x

    
    

In [11]:
def get_optimizer(model,hparams,length):
  # total_steps=length
  batch_size=hparams.get("batch_size",128)
  gradient_accumulation_steps=hparams.get("gradient_accumulation_steps",1)
  max_epochs=hparams.get("max_epochs",40)
  lr=hparams.get("lr",0.0001)
  warmup=hparams.get('warmup',0.1)
  total_steps = ((length/ batch_size/ gradient_accumulation_steps)* max_epochs)
  # int( math.ceil(len(train_examples) / args.train_batch_size) / args.gradient_accumulation_steps) * args.num_train_epochs 
  param_optimizer = list(model.named_parameters())
  no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
  optimizer_grouped_parameters = [
      {"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": 0.01},
      {"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0,},
  ]
  optimizer = BertAdam(
      optimizer_grouped_parameters,
      lr=lr,
      warmup=warmup,
      t_total=total_steps,
  )
  return optimizer


In [12]:
def train(model,criterion,samples,hparams):
    model.train()
    freeze_img=hparams.get("freeze_img",0)
    freeze_txt=hparams.get("freeze_text",0)
    
    txt, segment, mask, img, label = samples
    # print(img)
    modelIdentifier=hparams.get('model',"concatbert")

    if modelIdentifier=="concatbert":
    
      txt,segment,mask,img=txt.cuda(),segment.cuda(),mask.cuda(),img.cuda()
      out=model(txt, mask, segment,img)

    elif modelIdentifier=="mmbt":
      # model=MultimodalBertClf(hparams)
      for param in model.enc.img_encoder.parameters():
        param.requires_grad = not freeze_img
      for param in model.enc.encoder.parameters():
        param.requires_grad = not freeze_txt
      txt=txt.cuda()
      img=img.cuda()
      mask,segment=mask.cuda(),segment.cuda()
      out=model(txt, mask, segment,img)
    elif modelIdentifier == "bert":
      txt, mask, segment = txt.cuda(), mask.cuda(), segment.cuda()
      out = model(txt, mask, segment) 
    elif modelIdentifier=="ensemble":
      for param in model.enc.img_encoder.parameters():
        param.requires_grad = not freeze_img
      for param in model.enc.encoder.parameters():
        param.requires_grad = not freeze_txt
      txt=txt.cuda()
      img=img.cuda()
      mask,segment=mask.cuda(),segment.cuda()
      out=model(txt,mask,segment,img)
      
    # if test==False:
    # if 'label' in 
    label=label.cuda()
    loss=criterion(out,label)
    # else:
    #   loss=0
    
    #uncomment for test set
    # return out,loss,label,id
    return out,loss,label
def eval(model,criterion,val_loader,hparams):
  
  with torch.no_grad():
    losses, preds, labels,outs = [], [], [],[]
    for i,samples in enumerate(val_loader):
      
      out,loss,label=train(model,criterion,samples,hparams)
      losses.append(loss.item())

      
            
      pred = torch.nn.functional.softmax(out, dim=1).argmax(dim=1).cpu().detach().numpy()
      out=out.cpu()
      
      outs+=out[:,1]
      preds.append(pred)
      label = label.cpu().detach().numpy()
      labels.append(label)
  metrics = {"loss": np.mean(losses)}
  labels = [l for sl in labels for l in sl]
  preds = [l for sl in preds for l in sl]
  
  outs=[l.item() for l in outs]
  
  
  
  metrics["acc"] = accuracy_score(labels, preds)
  metrics['auc']=roc_auc_score(labels,outs)
  return metrics


def testPred(model,criterion,test_set,test_loader,hparams):
  model.eval()

  id=test_set[4]
  submission_frame = pd.DataFrame(columns=['id',"proba", "label"])
  # print(len(id))
  ids=[]
  pred=[]
  probs=[]
  with torch.no_grad():
    for batch in (tq.tqdm(test_loader)):
      
      
      preds,_,_,id=train(model,criterion,batch,hparams,test=True)
      preds=preds.cpu()
      
      ids+=id
      pred+=(preds.argmax(dim=1))
      probs+=(preds[:,1])
      
    
    submission_frame['id']=ids
    submission_frame['proba']=probs
    submission_frame['label']=pred
    
    submission_frame.proba = submission_frame.proba.astype(float)
    
    submission_frame.label = submission_frame.label.astype(int)
    return submission_frame




In [13]:
def training(hparams):
  criterion=nn.CrossEntropyLoss()
  modelIdentifier=hparams.get("model","concatbert")
  if modelIdentifier=='concatbert':
    model=ConcatBert(hparams=hparams)
  elif modelIdentifier=='mmbt':
    model=MultimodalBertClf(hparams)
  elif modelIdentifier=='bert':
    model=BertClf(hparams)
  elif modelIdentifier=="ensemble":
    model=ensembleBert(hparams)
  train_dataloader,dev_dataloader,test_dataloader,length=get_loaders(hparams)
  optimizer=get_optimizer(model,hparams,length)
  model.cuda()
  batch_size=hparams.get("batch_size",128)
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.1, verbose=True)
  gradient_accumulation_steps=hparams.get("gradient_accumulation_steps",24)
  start_epoch, global_step, n_no_improve, best_metric = 0, 0, 0, -np.inf
  epochs=hparams.get("max_epochs",40)
  for i in range(1,epochs+1):
    train_losses = []
    model.train()
    optimizer.zero_grad()
    
    accumulation_steps=hparams.get('gradient_accumulation_steps',256)
    for samples in tq.tqdm(train_dataloader):
      _,loss,_=train(model,criterion,samples,hparams)
      if accumulation_steps>1:
        loss=loss/accumulation_steps
      loss.backward()
      train_losses.append(loss.item())
      global_step+=1
      if global_step%accumulation_steps==0:
        print(global_step)
        optimizer.step()
        optimizer.zero_grad()

    model.eval()
    metrics=eval(model,criterion,dev_dataloader,hparams)
    tuning_metric=metrics['auc']
    scheduler.step(tuning_metric)
    n_no_improve+=1
    if tuning_metric>best_metric:
      n_no_improve=0
      if hparams.get("save_model")=='yes':
        save_name=hparams.get("save_name")
        torch.save(model,save_name)
        print("model saved at epoch",i)
        best_metric=tuning_metric
    if n_no_improve>=4:
      print("early stopping triggered")
      break

    print("epoch | {:2d} | train loss {:5.4f}".format(i,np.mean(train_losses)) )
    print("epoch | {} | val loss {} | val accuracy {} | auc {} |  ".format(i,metrics['loss'],metrics['acc'],metrics['auc']))



In [14]:

train_path = "data/train.jsonl"
dev_path = "data/dev.jsonl"
test_path="data/test.jsonl"
set_seed(40)
img_dir =os.path.join(os.getcwd(),'data')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resnet152=M.resnet152(pretrained=True)


hparams = {

    # Required hparams
    "train_path": train_path,
    "dev_path": dev_path,
    "img_dir": img_dir,
    "test_path":test_path,

    # Optional hparams
    "embeddings": 300,
    "language_feature_dim": 300,
    "vision_feature_dim": 300,
    "fusion_output_size": 256,
    "output_path": "model-outputs",
    "dev_limit": None,
    "lr": 0.0001,
    "max_epochs": 10,
    "n_gpu": 1,
    "batch_size": 64,
    # allows us to "simulate" having larger batches
    "accumulate_grad_batches": 256,
    # "early_stop_patience": 6,
    "device":device,
    "pool":"max",
    "save_model":"yes",
    "model":"mmbt",
    
    "hidden_sz":768,
    "vision_model":resnet152,
    "save_name":"mmbt_ensemble.pt",
    "image_hidden_size":resnet152.fc.in_features
    
}
if __name__=="__main__":
  
  training(hparams)

HBox(children=(FloatProgress(value=0.0, max=133.0), HTML(value='')))

RuntimeError: ignored