In [1]:
!pip install  dgl -f https://data.dgl.ai/wheels/torch-2.4/cu124/repo.html

Looking in links: https://data.dgl.ai/wheels/torch-2.4/cu124/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/torch-2.4/cu124/dgl-2.4.0%2Bcu124-cp310-cp310-manylinux1_x86_64.whl (347.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m347.8/347.8 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting torch<=2.4.0 (from dgl)
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<=2.4.0->dgl)


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd /content/drive/MyDrive/VRD-IU

/content/drive/MyDrive/VRD-IU


In [4]:
!cp train_data_graphs.bin train_data.pkl train_visual_features.zip /content/

In [5]:
%cd /content/

/content


In [6]:
!unzip -q train_visual_features.zip

In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import pickle
from transformers import AutoTokenizer
import torch
class CompTextDataset(Dataset):
    def __init__(self, pickle_file):
        super().__init__()
        with open(pickle_file, 'rb') as file:
            data = pickle.load(file)
        self.components = []
        for k in data.keys():
            for comp in data[k]['components']:
                if comp['bbox'] == [0.0, 0.0, 0.0, 0.0]:
                  continue
                self.components.append(comp)

    def __len__(self):
        return len(self.components)

    def __getitem__(self, index):
        comp = self.components[index]
        try:
            text = comp['text']
        except:
            text = comp['category']
        return text, comp['object_id']

In [None]:
train_dataset = CompTextDataset('train_data.pkl')

In [None]:
from transformers import XLMRobertaModel
model = XLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using: {device}")
model.to(device)
model.eval()

In [None]:
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")

In [None]:
from tqdm import tqdm
import os
def extract_features(dataloader, feature_path):
  if not os.path.exists(feature_path):
    os.makedirs(feature_path)
  with torch.no_grad():
      for texts, object_ids in tqdm(dataloader):
          text_inputs = tokenizer(texts, return_tensors="pt",padding=True, truncation=True).to(device)
          outputs = model(**text_inputs)
          features = outputs.pooler_output.detach().cpu()
          for idx, obj_id in enumerate(object_ids):
            torch.save(features[idx],os.path.join(feature_path,f"{obj_id}.pt"))

In [None]:
train_dataloader = DataLoader(train_dataset,batch_size=512, num_workers=6)

In [None]:
extract_features(train_dataloader,  'train_textual_features')
print("Extraction completed for training set!")

In [1]:
from torch.utils.data import Dataset, DataLoader
import pickle
import dgl
import os
import torch
import numpy as np
class GraphDataset(Dataset):
    def __init__(self, graph_file, visual_feature_dir, textual_feature_dir):
        super().__init__()
        self.graphs,_ = dgl.load_graphs(graph_file)

        self.visual_feature_dir = visual_feature_dir
        self.textual_feature_dir = textual_feature_dir

    def __len__(self):
        return len(self.graphs)

    def load_feat(self,feature_dir,nodes,feat_dim):
        tensors = []
        for idx,node in enumerate(nodes):
            try:
                tensors.append(torch.load(f"{feature_dir}/{node}.pt",map_location=torch.device("cpu"),weights_only=False).unsqueeze(0))
            except:
                tensors.append(torch.zeros((1,feat_dim)))
        return torch.cat(tensors,dim=0)

    def __getitem__(self, index):
        g = self.graphs[index]
        nodes = g.ndata['obj_id']
        visual_feats = self.load_feat(self.visual_feature_dir,nodes,1536)
        textual_feats = self.load_feat(self.textual_feature_dir,nodes,768)
        return g, visual_feats, textual_feats


In [2]:
import torch
import torch.nn as nn
import dgl
import torch.nn.functional as F
class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W2 = nn.Linear(h_feats*2, 1)
        self.W1 = nn.Linear(h_feats*2, h_feats)
        self.lstm_cell = nn.LSTMCell(h_feats, h_feats*2)

    def apply_edges(self, edges):
        """
        Computes a scalar score for each edge of the given graph.

        Parameters
        ----------
        edges :
            Has three members ``src``, ``dst`` and ``data``, each of
            which is a dictionary representing the features of the
            source nodes, the destination nodes, and the edges
            themselves.

        Returns
        -------
        dict
            A dictionary of new edge features.
        """
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        h = self.W1(h)
        h = F.relu(h)
        h = self.lstm_cell(h)[0]
        score = self.W2(h).squeeze(1)
        return {'score': score}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']

In [3]:
dataset = GraphDataset('train_data_graphs.bin','train_visual_features')

In [4]:
from torch.utils.data import random_split
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
train_dataset, val_dataset = random_split(dataset,[0.9,0.1])

In [5]:
train_dataloader = DataLoader(train_dataset, batch_size=1, collate_fn=lambda batch: batch[0])
val_dataloader = DataLoader(val_dataset, batch_size=1, collate_fn=lambda batch: batch[0])

In [9]:
model = MLPPredictor(1536)
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cpu")
model.to(device)
from torch import optim
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
def sigmoid_focal_loss(inputs, targets, alpha=0.25, gamma=2):
  p = torch.sigmoid(inputs)
  ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
  p_t = p * targets + (1 - p) * (1 - targets)
  loss = ce_loss * ((1 - p_t) ** gamma)
  if alpha >= 0:
      alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
      loss = alpha_t * loss
  loss = loss.mean()
  return loss


In [None]:
from sklearn.metrics import f1_score
from tqdm import tqdm
best_f1 = 0.0
for epoch in tqdm(range(10)):
    total_loss = 0.0
    model.train()
    for g, feats in val_dataloader:
        scores = model(g.to(device),feats.to(device))
        labels = g.edata['labels']
        loss = sigmoid_focal_loss(scores, labels.to(device),alpha=2,gamma=1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        g.to(torch.device('cpu'))
        feats.to(torch.device('cpu'))
        torch.cuda.empty_cache()
    total_loss/=len(val_dataloader)
    print(f'Epoch: {epoch} Train Loss: {total_loss}')
    model.eval()
    total_loss = 0.0
    f1 = 0.0
    with torch.no_grad():
        for g, feats in val_dataloader:
            scores = model(g.to(device),feats.to(device))
            preds = torch.sigmoid(scores) >= 0.5
            preds = preds.long()
            labels = g.edata['labels']
            loss = sigmoid_focal_loss(scores, labels.to(device),alpha=2,gamma=1)
            total_loss+=loss.item()
            f1+= f1_score(labels.detach().cpu().numpy(), preds.detach().cpu().numpy())
        total_loss/=len(val_dataloader)
        f1/=len(val_dataloader)
    print(f'Epoch: {epoch} Val Loss: {total_loss} F1: {f1}')
    if f1 > best_f1:
        best_f1 = f1
        torch.save(model.state_dict(),"predictor.pth")



  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
best_auc