In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/VRD-IU

/content/drive/MyDrive/VRD-IU


In [3]:
!cp train_visual_features.pkl train_textual_features.pkl  obj_cat.csv /content/

In [4]:
%cd /content/

/content


In [5]:
import pickle
with open("train_visual_features.pkl", 'rb') as f:
    visual_features = pickle.load(f)
with open("train_textual_features.pkl", 'rb') as f:
    textual_features = pickle.load(f)

In [6]:
from torch.utils.data import Dataset, DataLoader
import os
import torch
import numpy as np
import pandas as pd
class FeatureDataset(Dataset):
    def __init__(self, df,visual_features, textual_features):
        super().__init__()
        self.df = df
        self.visual_features = torch.tensor(list(visual_features[visual_features['id'].isin(train_df['id'])]['features']))
        self.textual_features = torch.tensor(list(textual_features[textual_features['id'].isin(train_df['id'])]['features']))

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        data = self.df.loc[index]
        visual_feats = self.visual_features[index]
        textual_feats = self.textual_features[index]
        return visual_feats, textual_feats, int(data['class'])


In [7]:
df = pd.read_csv("obj_cat.csv")
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df,test_size=0.2)

In [8]:
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [9]:
train_dataset = FeatureDataset(train_df,visual_features, textual_features)

In [10]:
val_dataset = FeatureDataset(val_df,visual_features, textual_features)

In [11]:
train_dataloader = DataLoader(train_dataset,batch_size=256,shuffle=True,num_workers=2)

In [12]:
val_dataloader = DataLoader(val_dataset,batch_size=256,shuffle=True,num_workers=2)

In [13]:
import torch.nn as nn
class Classifer(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.fuser =  nn.Sequential(nn.Linear(1536+768,3072),
                                   nn.ReLU(),
                                   nn.Linear(3072,768))
        self.fc = nn.Linear(768,25)

    def forward(self, visual_feats, text_feats):
        embed = self.fuser(torch.cat((visual_feats,text_feats),dim=1))
        pred = self.fc(embed)
        return pred

In [14]:
from torch import optim
model = Classifer()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

In [19]:
def train(model, dataloader, criterion, optimizer):
    model.train()
    total_loss = 0.0
    for visual_feats, text_feats, labels in dataloader:
        visual_feats = visual_feats.to(device)
        text_feats = text_feats.to(device)
        labels = labels.to(device)
        pred = model(visual_feats,text_feats)
        loss = criterion(pred,labels)
        total_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return total_loss/len(dataloader)


In [20]:
from sklearn.metrics import f1_score, accuracy_score
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    total_acc = 0.0
    total_f1 = 0.0
    with torch.no_grad():
        for visual_feats, text_feats, labels in dataloader:
            visual_feats = visual_feats.to(device)
            text_feats = text_feats.to(device)
            labels = labels.to(device)
            pred = model(visual_feats,text_feats)
            loss = criterion(pred,labels)
            total_loss += loss.item()
            predicted_labels = pred.argmax(dim=1)
            f1 = f1_score(labels.detach().cpu().numpy(), predicted_labels.detach().cpu().numpy(),average="macro")
            acc = accuracy_score(labels.detach().cpu().numpy(), predicted_labels.detach().cpu().numpy())
            total_f1+=f1
            total_acc+=acc

    return total_loss/len(dataloader),total_acc/len(dataloader),total_f1/len(dataloader)

In [21]:
epochs = 100
from tqdm import tqdm
for epoch in tqdm(range(epochs)):
    train_loss = train(model,train_dataloader,criterion,optimizer)
    val_loss, acc, f1 = evaluate(model,val_dataloader,criterion)
    print(f'Epoch: {epoch} Train Loss: {train_loss} Val Loss: {val_loss} Val Accuracy: {acc} Val F1: {f1}')

  1%|          | 1/100 [00:03<05:23,  3.27s/it]

Epoch: 0 Train Loss: 2.1443084013812683 Val Loss: 2.137494858573465 Val Accuracy: 0.4676598714082407 Val F1: 0.030455218442424034


  2%|▏         | 2/100 [00:06<05:10,  3.16s/it]

Epoch: 1 Train Loss: 2.1357286511098637 Val Loss: 2.1322663461460785 Val Accuracy: 0.46840215420845754 Val F1: 0.030253217368888797


  3%|▎         | 3/100 [00:09<05:04,  3.14s/it]

Epoch: 2 Train Loss: 2.131291496403077 Val Loss: 2.1400526656824 Val Accuracy: 0.4674623881810789 Val F1: 0.031099832310136792


  4%|▍         | 4/100 [00:14<05:57,  3.72s/it]

Epoch: 3 Train Loss: 2.1320231206276836 Val Loss: 2.146492768736447 Val Accuracy: 0.4678690024396856 Val F1: 0.030247577561704618


  5%|▌         | 5/100 [00:17<05:36,  3.55s/it]

Epoch: 4 Train Loss: 2.126316413283348 Val Loss: 2.14267534718794 Val Accuracy: 0.4679838921455679 Val F1: 0.03023105930747808


  6%|▌         | 6/100 [00:20<05:20,  3.41s/it]

Epoch: 5 Train Loss: 2.1240774077527664 Val Loss: 2.14742575673496 Val Accuracy: 0.4675036849417186 Val F1: 0.030817596233722235


  7%|▋         | 7/100 [00:23<05:11,  3.35s/it]

Epoch: 6 Train Loss: 2.119578911977656 Val Loss: 2.164682125343996 Val Accuracy: 0.4668349950867444 Val F1: 0.030884636355198877


  8%|▊         | 8/100 [00:28<05:45,  3.75s/it]

Epoch: 7 Train Loss: 2.1172071484958424 Val Loss: 2.157144430805655 Val Accuracy: 0.4672972011385199 Val F1: 0.03095553586951735


  9%|▉         | 9/100 [00:31<05:26,  3.59s/it]

Epoch: 8 Train Loss: 2.110756856553695 Val Loss: 2.170809062088237 Val Accuracy: 0.466708457576579 Val F1: 0.03147917714436877


 10%|█         | 10/100 [00:34<05:12,  3.47s/it]

Epoch: 9 Train Loss: 2.1105248437208286 Val Loss: 2.1791907759273754 Val Accuracy: 0.4655505599417186 Val F1: 0.030726124768201104


 11%|█         | 11/100 [00:37<05:00,  3.37s/it]

Epoch: 10 Train Loss: 2.1055372813168693 Val Loss: 2.167041631305919 Val Accuracy: 0.4667910510978585 Val F1: 0.03135366233476894


 12%|█▏        | 12/100 [00:42<05:29,  3.75s/it]

Epoch: 11 Train Loss: 2.1030287400764576 Val Loss: 2.1691038187812355 Val Accuracy: 0.4660810644822445 Val F1: 0.030740503768517082


 13%|█▎        | 13/100 [00:45<05:10,  3.57s/it]

Epoch: 12 Train Loss: 2.0983270704746246 Val Loss: 2.17443542620715 Val Accuracy: 0.46564744849552725 Val F1: 0.030873939461698977


 14%|█▍        | 14/100 [00:48<04:55,  3.43s/it]

Epoch: 13 Train Loss: 2.0956783487516293 Val Loss: 2.2004389692755306 Val Accuracy: 0.4655505599417186 Val F1: 0.030871331669265003


 15%|█▌        | 15/100 [00:53<05:18,  3.74s/it]

Epoch: 14 Train Loss: 2.0952216956545326 Val Loss: 2.1911462124656227 Val Accuracy: 0.4653620772905937 Val F1: 0.03038351089125211


 16%|█▌        | 16/100 [00:57<05:34,  3.98s/it]

Epoch: 15 Train Loss: 2.089754350045148 Val Loss: 2.19591663164251 Val Accuracy: 0.465300132149634 Val F1: 0.031147292544020266


 17%|█▋        | 17/100 [01:00<05:10,  3.74s/it]

Epoch: 16 Train Loss: 2.084392586175133 Val Loss: 2.215751679504619 Val Accuracy: 0.4641279394822445 Val F1: 0.0313128279460712


 18%|█▊        | 18/100 [01:04<04:52,  3.57s/it]

Epoch: 17 Train Loss: 2.0847566145307876 Val Loss: 2.2237322470721077 Val Accuracy: 0.4637912120493359 Val F1: 0.03156484332587625


 19%|█▉        | 19/100 [01:07<04:40,  3.47s/it]

Epoch: 18 Train Loss: 2.0808235363048664 Val Loss: 2.220393917139839 Val Accuracy: 0.46390345452697207 Val F1: 0.03149896808261651


 19%|█▉        | 19/100 [01:09<04:55,  3.65s/it]


KeyboardInterrupt: 