# IIC-3641 GML UC

- Versiones de librerías, python 3.10.2
- DGL: https://www.dgl.ai/pages/start.html


In [1]:
import torch
print(torch.__version__)

2.4.1+cu118


## DGL requiere de un framework de backend. Aquí va con torch sobre cuda (GPU).

In [2]:
import os

os.environ["DGLBACKEND"] = "pytorch"

## Definiremos una capa GAT

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GATConv


class GATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim):
        super(GATLayer, self).__init__()
        self.g = g
        self.fc = nn.Linear(in_dim, out_dim, bias=False)
        self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False)
        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        gain = nn.init.calculate_gain("relu")
        nn.init.xavier_normal_(self.fc.weight, gain=gain)
        nn.init.xavier_normal_(self.attn_fc.weight, gain=gain)

    def edge_attention(self, edges):
        z2 = torch.cat([edges.src["z"], edges.dst["z"]], dim=1)
        a = self.attn_fc(z2)
        return {"e": F.leaky_relu(a)}

    def message_func(self, edges):
        return {"z": edges.src["z"], "e": edges.data["e"]}

    def reduce_func(self, nodes):
        alpha = F.softmax(nodes.mailbox["e"], dim=1)
        h = torch.sum(alpha * nodes.mailbox["z"], dim=1)
        return {"h": h}

    def forward(self, h):
        z = self.fc(h)
        self.g.ndata["z"] = z
        self.g.apply_edges(self.edge_attention)
        self.g.update_all(self.message_func, self.reduce_func)
        return self.g.ndata.pop("h")

## Multi-head attention

In [4]:
class MultiHeadGATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim, num_heads, merge="cat"):
        super(MultiHeadGATLayer, self).__init__()
        self.heads = nn.ModuleList()
        for i in range(num_heads):
            self.heads.append(GATLayer(g, in_dim, out_dim))
        self.merge = merge

    def forward(self, h):
        head_outs = [attn_head(h) for attn_head in self.heads]
        if self.merge == "cat":
            return torch.cat(head_outs, dim=1)
        else:
            return torch.mean(torch.stack(head_outs))

## Definimos la GAT

In [5]:
class GAT(nn.Module):
    def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads):
        super(GAT, self).__init__()
        self.layer1 = MultiHeadGATLayer(g, in_dim, hidden_dim, num_heads)
        self.layer2 = MultiHeadGATLayer(g, hidden_dim * num_heads, out_dim, 1)

    def forward(self, h):
        h = self.layer1(h)
        h = F.elu(h)
        h = self.layer2(h)
        return h


## Leemos el dataset

In [6]:
from dgl import DGLGraph
from dgl.data import citation_graph as citegrh
import time
import numpy as np

def load_cora_data():
    data = citegrh.load_cora()
    g = data[0]
    train_mask = torch.BoolTensor(g.ndata["train_mask"])
    test_mask = torch.BoolTensor(g.ndata["test_mask"])
    
    return g, g.ndata["feat"], g.ndata["label"], train_mask, test_mask


g, features, labels, train_mask, test_mask = load_cora_data()

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [7]:
import torch as th

def evaluate(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = th.max(logits, dim=1)
        correct = th.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

## Definimos la red y el ciclo de entrenamiento

In [8]:
net = GAT(g, in_dim=features.size()[1], hidden_dim=8, out_dim=7, num_heads=2)

optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)


dur = []
for epoch in range(100):
    if epoch >= 3:
        t0 = time.time()

    logits = net(features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >= 3:
        dur.append(time.time() - t0)
    acc = evaluate(net, g, features, labels, test_mask)
    print(
        "Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)
        )
    )

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Loss 1.9471 | Test Acc 0.1310 | Time(s) nan
Epoch 00001 | Loss 1.9450 | Test Acc 0.1660 | Time(s) nan
Epoch 00002 | Loss 1.9430 | Test Acc 0.1940 | Time(s) nan
Epoch 00003 | Loss 1.9409 | Test Acc 0.2380 | Time(s) 0.0592
Epoch 00004 | Loss 1.9388 | Test Acc 0.2700 | Time(s) 0.0572
Epoch 00005 | Loss 1.9368 | Test Acc 0.3100 | Time(s) 0.0572
Epoch 00006 | Loss 1.9347 | Test Acc 0.3410 | Time(s) 0.0570
Epoch 00007 | Loss 1.9326 | Test Acc 0.3760 | Time(s) 0.0566
Epoch 00008 | Loss 1.9305 | Test Acc 0.4200 | Time(s) 0.0563
Epoch 00009 | Loss 1.9284 | Test Acc 0.4460 | Time(s) 0.0566
Epoch 00010 | Loss 1.9264 | Test Acc 0.4640 | Time(s) 0.0564
Epoch 00011 | Loss 1.9243 | Test Acc 0.4780 | Time(s) 0.0562
Epoch 00012 | Loss 1.9222 | Test Acc 0.5010 | Time(s) 0.0563
Epoch 00013 | Loss 1.9200 | Test Acc 0.5150 | Time(s) 0.0562
Epoch 00014 | Loss 1.9179 | Test Acc 0.5340 | Time(s) 0.0561
Epoch 00015 | Loss 1.9158 | Test Acc 0.5540 | Time(s) 0.0562
Epoch 00016 | Loss 1.9137 | Test 