In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/VRD-IU

In [None]:
!cp train_visual_features.pkl train_textual_features.pkl  obj_cat.csv /content/

In [None]:
%cd /content/

In [1]:
from torch.utils.data import Dataset, DataLoader
import pickle
import dgl
import os
import torch
import numpy as np
import pandas as pd
class FeatureDataset(Dataset):
    def __init__(self, df,visual_feature_file, textual_feature_file):
        super().__init__()
        self.df = df
        with open(visual_feature_file, 'rb') as f:
            self.visual_features = pickle.load(f)
        with open(textual_feature_file, 'rb') as f:
            self.textual_features = pickle.load(f)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        data = self.df.loc[index]
        visual_feats = torch.tensor(list(self.visual_features[self.visual_features['id']==data['id']]['features']))
        textual_feats = torch.tensor(list(self.textual_features[self.textual_features['id']==data['id']]['features']))
        return visual_feats.squeeze(0), textual_feats.squeeze(0), data['class']


In [2]:
df = pd.read_csv("obj_cat.csv")
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df,test_size=0.2)

In [3]:
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [4]:
train_dataset = FeatureDataset(train_df,"train_visual_features.pkl","train_textual_features.pkl")

In [12]:
val_dataset = FeatureDataset(val_df,"train_visual_features.pkl","train_textual_features.pkl")

In [10]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True,num_workers=8)

In [13]:
val_dataloader = DataLoader(val_dataset,batch_size=32,shuffle=True,num_workers=8)

In [14]:
import torch.nn as nn
class Classifer(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.fuser =  nn.Sequential(nn.Linear(1536+768,3072),
                                   nn.ReLU(),
                                   nn.Linear(3072,768))
        self.fc = nn.Linear(768,25)

    def forward(self, visual_feats, text_feats):
        embed = self.fuser(torch.cat((visual_feats,text_feats),dim=1))
        pred = self.fc(embed)
        return pred

In [None]:
from torch import optim
model = Classifer()
device = torch.device("cuda")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

In [None]:
def train(model, dataloader, criterion, optimizer):
    model.train()
    total_loss = 0.0
    for visual_feats, text_feats, labels in dataloader:
        visual_feats = visual_feats.to(device)
        text_feats = text_feats.to(device)
        labels = labels.to(device)
        pred = model(visual_feats,text_feats)
        loss = criterion(pred,labels)
        total_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return total_loss/len(dataloader)


In [None]:
from sklearn.metrics import f1_score, accuracy_score
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    total_acc = 0.0
    total_f1 = 0.0
    with torch.no_grad():
        for visual_feats, text_feats, labels in dataloader:
            visual_feats = visual_feats.to(device)
            text_feats = text_feats.to(device)
            labels = labels.to(device)
            pred = model(visual_feats,text_feats)
            loss = criterion(pred,labels)
            total_loss += loss.item()
            predicted_labels = pred.argmax(dim=1)
            f1 = f1_score(labels.detach().cpu().numpy(), predicted_labels.detach().cpu().numpy())
            acc = accuracy_score(labels.detach().cpu().numpy(), predicted_labels.detach().cpu().numpy())
            total_f1+=f1
            total_acc+=acc

    return total_loss/len(dataloader),total_acc/len(dataloader),total_f1/len(dataloader)

In [None]:
epochs = 100
for epoch in range(epochs):
    train_loss = train(model,train_dataloader,criterion,optimizer)
    val_loss, acc, f1 = evaluate(model,val_dataloader,criterion)
    print(f'Epoch: {epoch} Train Loss: {train_loss} Val Loss: {val_loss} Val Accuracy: {acc} Val F1: {f1}')