In [1]:
!pip install  dgl -f https://data.dgl.ai/wheels/torch-2.4/cu124/repo.html

Looking in links: https://data.dgl.ai/wheels/torch-2.4/cu124/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/torch-2.4/cu124/dgl-2.4.0%2Bcu124-cp310-cp310-manylinux1_x86_64.whl (347.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m347.8/347.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting torch<=2.4.0 (from dgl)
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<=2.4.0->dgl)


In [16]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [17]:
%cd /content/drive/MyDrive/VRD-IU

/content/drive/MyDrive/VRD-IU


In [25]:
!cp train_data_graphs.bin train_visual_features.zip /content/

In [26]:
%cd /content/

/content


In [27]:
!unzip train_visual_features.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: train_visual_features/1560.pt  
  inflating: train_visual_features/1563.pt  
  inflating: train_visual_features/1565.pt  
  inflating: train_visual_features/15598.pt  
  inflating: train_visual_features/15590.pt  
  inflating: train_visual_features/15591.pt  
  inflating: train_visual_features/15594.pt  
  inflating: train_visual_features/15589.pt  
  inflating: train_visual_features/15592.pt  
  inflating: train_visual_features/15595.pt  
  inflating: train_visual_features/15587.pt  
  inflating: train_visual_features/15596.pt  
  inflating: train_visual_features/15588.pt  
  inflating: train_visual_features/15593.pt  
  inflating: train_visual_features/15597.pt  
  inflating: train_visual_features/15585.pt  
  inflating: train_visual_features/23107.pt  
  inflating: train_visual_features/15586.pt  
  inflating: train_visual_features/17139.pt  
  inflating: train_visual_features/17137.pt  
  inflating: train

In [28]:
from torch.utils.data import Dataset, DataLoader
import pickle
import dgl
import os
import torch
import numpy as np
class GraphDataset(Dataset):
    def __init__(self, graph_file, feature_dir):
        super().__init__()
        self.graphs,_ = dgl.load_graphs(graph_file)

        self.feature_dir = feature_dir

    def __len__(self):
        return len(self.graphs)

    def load_feat(self,nodes):
        tensors = []
        for idx,node in enumerate(nodes):
            try:
                tensors.append(torch.load(f"{self.feature_dir}/{node}.pt",map_location=torch.device("cpu"),weights_only=False).unsqueeze(0))
            except:
                tensors.append(torch.zeros((1,1536)))
        return torch.cat(tensors,dim=0)

    def __getitem__(self, index):
        g = self.graphs[index]
        nodes = g.ndata['obj_id']
        feats = self.load_feat(nodes)
        return g, feats


In [29]:
import torch
import torch.nn as nn
import dgl
import torch.nn.functional as F
class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W1 = nn.Linear(h_feats * 2, h_feats)
        self.W2 = nn.Linear(h_feats, 1)

    def apply_edges(self, edges):
        """
        Computes a scalar score for each edge of the given graph.

        Parameters
        ----------
        edges :
            Has three members ``src``, ``dst`` and ``data``, each of
            which is a dictionary representing the features of the
            source nodes, the destination nodes, and the edges
            themselves.

        Returns
        -------
        dict
            A dictionary of new edge features.
        """
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']

In [30]:
dataset = GraphDataset('train_data_graphs.bin','train_visual_features')

In [31]:
from torch.utils.data import random_split
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
train_dataset, val_dataset = random_split(dataset,[0.9,0.1])

In [32]:
train_dataloader = DataLoader(train_dataset, batch_size=1, collate_fn=lambda batch: batch[0])
val_dataloader = DataLoader(val_dataset, batch_size=1, collate_fn=lambda batch: batch[0])

In [33]:
model = MLPPredictor(1536)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
from torch import optim
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [34]:
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
best_auc = 0.0
for epoch in tqdm(range(10)):
    total_loss = 0.0
    model.train()
    for g, feats in tqdm(train_dataloader):
        scores = model(g.to(device),feats.to(device))
        labels = g.edata['labels']
        loss = F.binary_cross_entropy_with_logits(scores, labels.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        g.to(torch.device('cpu'))
        feats.to(torch.device('cpu'))
        torch.cuda.empty_cache()
    total_loss/=len(train_dataloader)
    print(f'Epoch: {epoch} Train Loss: {total_loss}')
    model.eval()
    total_loss = 0.0
    auc = 0.0
    with torch.no_grad():
        for g, feats in val_dataloader:
            scores = model(g.to(device),feats.to(device))
            labels = g.edata['labels']
            loss = F.binary_cross_entropy_with_logits(scores, labels.to(device))
            total_loss+=loss.item()
            auc+= roc_auc_score(labels.detach().numpy(), scores.detach().numpy())
        total_loss/=len(val_dataloader)
        auc/=len(val_dataloader)
    if auc > best_auc:
        auc = best_auc
        torch.save(model.state_dict(),"predictor.pth")
    print(f'Epoch: {epoch} Val Loss: {total_loss} AUC: {auc}')


  0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/514 [00:00<?, ?it/s][A
  0%|          | 1/514 [00:01<13:08,  1.54s/it][A
  1%|          | 3/514 [00:01<03:45,  2.26it/s][A
  1%|          | 5/514 [00:01<02:03,  4.13it/s][A
  2%|▏         | 8/514 [00:01<01:08,  7.37it/s][A
  2%|▏         | 11/514 [00:02<01:52,  4.47it/s]
  0%|          | 0/10 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.86 GiB. GPU 0 has a total capacity of 22.17 GiB of which 2.49 GiB is free. Process 20328 has 19.67 GiB memory in use. Of the allocated memory 19.41 GiB is allocated by PyTorch, and 30.07 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)