# Drive Misc

In [None]:
!sudo add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!sudo apt-get update -qq 2>&1 > /dev/null
!sudo apt -y install -qq google-drive-ocamlfuse 2>&1 > /dev/null
!google-drive-ocamlfuse

!sudo apt-get install -qq w3m # to act as web browser 
!xdg-settings set default-web-browser w3m.desktop # to set default browser
%cd /content
!mkdir drive
%cd drive
!mkdir MyDrive
%cd ..
!google-drive-ocamlfuse /content/drive/MyDrive

In [None]:
import json
path = "/content/drive/MyDrive/18-786-Intro-DL/IDL-Project/WebQA_data_first_release/WebQA_train_val.json"
q = json.load(open(path,'r'))


# Dataset and imports

In [None]:
import torch
import torch.nn as nn
import json
import clip
from torch.utils.data import DataLoader, Dataset
%pip install transformers
from transformers import BertTokenizer



class QADataset(Dataset):
  def __init__(self,  data):
    self.data = data
    self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    self.instances = list(self.data.keys())
    self.Qs = [self.tokenizer(self.data[instance]['Q'], 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for instance in self.instances]
    # print(self.Qs[6])
    self.lens = [(Q['input_ids'] == 0).nonzero(as_tuple=False)[0][1].numpy() for Q in self.Qs]
    self.modals = [0 if len(self.data[instance]['img_posFacts']) else 1 for instance in self.instances]

  
  def __len__(self):
    return len(self.instances)
  
  def __getitem__(self, idx):
    return self.Qs[idx], self.lens[idx], self.modals[idx]
    
# path = "/content/drive/MyDrive/18-786-Intro-DL/IDL-Project/WebQA_data_first_release/WebQA_train_val.json"
# dataset = QADataset(path)



# DataLoader Generation

In [None]:
import json
train_path = "/content/drive/MyDrive/18-786-Intro-DL/IDL-Project/WebQA_data_first_release/WebQA_train_val.json"
q = json.load(open(train_path,'r'))
# test_path = "/content/drive/MyDrive/18-786-Intro-DL/IDL-Project/WebQA_data_first_release/WebQA_test.json"
# qq =  json.load(open(test_path,'r'))

val_pts = {k:v for k,v in q.items() if v['split'] == 'val'}
train_pts = {k:v for k,v in q.items() if v['split'] == 'train'}

train_loader = DataLoader(QADataset(train_pts),batch_size=128,shuffle=True)
val_loader = DataLoader(QADataset(val_pts),batch_size=128,shuffle=False)
# test_loader = DataLoader(QADataset(qq),batch_size=128,shuffle=False)

In [None]:
torch.cuda.empty_cache() # Use this often
!nvidia-smi

# Model Definition

This model was used to initially try out manymodal as described [in this paper](https://arxiv.org/abs/2001.08034)

In [None]:
from torch import nn
from transformers import BertModel, BertConfig
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

configuration = BertConfig('bert-base-cased')
bert = BertModel.from_pretrained('bert-base-cased').cuda().eval()

class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(BertClassifier, self).__init__()
        
        # print(configuration.hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.reducer = nn.Linear(768,256)
        self.lstm = nn.LSTM(input_size=256, hidden_size=512,\
                            num_layers=2, bidirectional=True, dropout=.2)
        # self.relu = nn.ReLU()
        self.linear = nn.Sequential(
            nn.Linear(512*2,1024),
            nn.ReLU(),
            nn.Dropout(p=.2),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Dropout(p=.2),
            nn.Linear(512,2)
        )

    def forward(self, seq_output, lens):
        batch_size = seq_output[0].shape[0]
        # print(batch_size)
        # with torch.no_grad():
        #    = self.
        # print(seq_output[0].shape)
        dropout_output = self.dropout(self.reducer(seq_output[0]))
        packed_input = pack_padded_sequence(dropout_output, lengths=lens, batch_first=True, enforce_sorted=False)
        out1, (out2, out3) = self.lstm(packed_input)
        out, lengths  = pad_packed_sequence(out1,batch_first=True)
        # print(out.shape, lens,lengths,out[:,-1,:].view(batch_size,-1).shape)
        final_layer = self.linear(out[:,-1,:].view(batch_size,-1))

        return final_layer

model = BertClassifier().cuda()
try:
  import torchsummaryX
except:
  !pip install torchsummaryX
  import torchsummaryX
from torchsummaryX import summary

x = next(iter(train_loader))
with torch.no_grad():
  seq_output =  bert(input_ids= x[0]['input_ids'].squeeze().cuda(), attention_mask=x[0]['attention_mask'].cuda(),return_dict=False)
summary(model, seq_output, x[1])

# Optimizer and Losses

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchsummaryX import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import torch.optim as optim


optimizer = optim.Adam(model.parameters(),lr=1e-3, weight_decay=4e-6)
criterion = nn.CrossEntropyLoss()

# Main Loops

In [None]:
from tqdm import tqdm
def validate():
  model.eval()
  batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
  num_correct = 0
  total_loss = 0.
  for i , (q, lens, target) in enumerate(val_loader):
    # print(q['input_ids'].shape)
    input_id = q['input_ids'].squeeze().cuda()
    mask = q['attention_mask'].cuda()
    with torch.no_grad():
      seq_output =  bert(input_ids= input_id, attention_mask=mask,return_dict=False)
    output = model(seq_output,lens)
    loss = criterion(output, target.cuda())
   
    num_correct += int((torch.argmax(output, axis=1) == target.cuda()).sum())

    batch_bar.set_postfix(acc="{:.04f}%".format(100 * num_correct / ((i + 1) * 128)))
    batch_bar.update()
  val_loss = total_loss / len(val_loader)   
  batch_bar.close()
  print("\n")
  print("Validation: {:.04f}%".format(100 * num_correct / (len(val_loader)*128)))
  return num_correct/(len(val_loader)*128) *100, val_loss

for epoch in range(50):
  batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
  num_correct = 0
  total_loss = 0
  model.train()
  for i , (q, lens, target) in enumerate(train_loader):
    # print(q['input_ids'].shape)
    input_id = q['input_ids'].squeeze().cuda()
    mask = q['attention_mask'].cuda()
    with torch.no_grad():
      seq_output =  bert(input_ids= input_id, attention_mask=mask,return_dict=False)
    output = model(seq_output,lens)
    loss = criterion(output, target.cuda())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    num_correct += int((torch.argmax(output, axis=1) == target.cuda()).sum())
    total_loss += float(loss)
    batch_bar.set_postfix(
              acc="{:.04f}%".format(100 * num_correct / ((i + 1) * 128)),
              loss="{:.04f}".format(float(total_loss / (i + 1))),
              num_correct=num_correct,
              lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
    batch_bar.update() 

  batch_bar.close()
  acc = 100 * num_correct / (len(train_loader) * 128)
  tr_loss = float(total_loss / len(train_loader))

  print("\n")
  print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}".format(
        epoch + 1,
        50,
        acc,
        tr_loss))
  val_acc,val_loss = validate()
  print(f"valid Acc: {val_acc}")



