# IIC-3641 GML UC

## Actividad en clase

Vamos a usar el modelo GIN para trabajar en esta actividad.

- Corra el modelo GIN y vea que puede reproducir el ejemplo de la clase.
- Use el dataset **imdb-binary** que viene en TUDataset. 
- Entrene el modelo GIN. Evalúe.
- Cuanto termine, me avisa para entregarle una **L (logrado)**.
- Recuerde que las L otorgan un bono en la nota final de la asignatura.

***Tiene hasta el final de la clase.***

In [1]:
import torch
print(torch.__version__)

device = torch.device('cuda')

2.4.1+cu118


In [2]:
import os
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch.nn import BatchNorm1d as BN
from torch.nn import Linear, ReLU, Sequential
from torch.optim import Adam
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GINConv, global_mean_pool
from torch_geometric.utils import degree

dataset = TUDataset("data/", "IMDB-BINARY", cleaned=False)
dataset.data.edge_attr = None

  _torch_pytree._register_pytree_node(
Downloading https://www.chrsmrrs.com/graphkerneldatasets/IMDB-BINARY.zip
Processing...
Done!


In [3]:
class NormalizedDegree:
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, data):
        deg = degree(data.edge_index[0], dtype=torch.float)
        deg = (deg - self.mean) / self.std
        data.x = deg.view(-1, 1)
        return data



In [4]:
if dataset.data.x is None:
    max_degree = 0
    degs = []
    for data in dataset:
        degs += [degree(data.edge_index[0], dtype=torch.long)]
        max_degree = max(max_degree, degs[-1].max().item())

    print(max_degree)

    if max_degree < 1000:
        dataset.transform = T.OneHotDegree(max_degree)
    else:
        deg = torch.cat(degs, dim=0).to(torch.float)
        mean, std = deg.mean().item(), deg.std().item()
        dataset.transform = NormalizedDegree(mean, std)
        
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

135


In [5]:
def num_graphs(data):
    if hasattr(data, "num_graphs"):
        return data.num_graphs
    else:
        return data.x.size(0)

In [6]:
class GIN0(torch.nn.Module):
    def __init__(self, dataset, num_layers, hidden):
        super().__init__()
        self.conv1 = GINConv(
            Sequential(
                Linear(dataset.num_features, hidden),
                ReLU(),
                Linear(hidden, hidden),
                ReLU(),
                BN(hidden),
            ),
            train_eps=False,
        )
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(
                GINConv(
                    Sequential(
                        Linear(hidden, hidden),
                        ReLU(),
                        Linear(hidden, hidden),
                        ReLU(),
                        BN(hidden),
                    ),
                    train_eps=False,
                )
            )
        self.lin1 = Linear(hidden, hidden)
        self.lin2 = Linear(hidden, dataset.num_classes)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        for conv in self.convs:
            x = conv(x, edge_index)
        x = global_mean_pool(x, batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


# model = GIN0(dataset, config.num_layers, config.latent_dim)
model = GIN0(dataset, 5, 32)

In [7]:
def train(model, optimizer, loader):
    model.train()

    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        data = data.to(device)
        out = model(data)
        loss = F.nll_loss(out, data.y.view(-1))
        loss.backward()
        total_loss += loss.item() * num_graphs(data)
        optimizer.step()
    return total_loss / len(loader.dataset)


def eval(model, loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            pred = model(data).max(1)[1]
        correct += pred.eq(data.y.view(-1)).sum().item()
    return correct / len(loader.dataset)

In [8]:
model.to(device).reset_parameters()
optimizer = Adam(model.parameters(), lr=0.01)

for epoch in range(1, 50 + 1):
    train_loss = train(model, optimizer, train_loader)
    train_acc = eval(model, train_loader)
    print("Epoch:", epoch,"Training Loss: ", train_loss, "Training Acc: ", train_acc)

Epoch: 1 Training Loss:  0.6775359263420105 Training Acc:  0.717
Epoch: 2 Training Loss:  0.6251230449676514 Training Acc:  0.718
Epoch: 3 Training Loss:  0.591374265909195 Training Acc:  0.705
Epoch: 4 Training Loss:  0.6035452876091003 Training Acc:  0.675
Epoch: 5 Training Loss:  0.5860662040710449 Training Acc:  0.721
Epoch: 6 Training Loss:  0.5916634163856507 Training Acc:  0.724
Epoch: 7 Training Loss:  0.5866223349571228 Training Acc:  0.707
Epoch: 8 Training Loss:  0.5779521317481995 Training Acc:  0.71
Epoch: 9 Training Loss:  0.581490930557251 Training Acc:  0.724
Epoch: 10 Training Loss:  0.5866789922714234 Training Acc:  0.704
Epoch: 11 Training Loss:  0.5698707323074341 Training Acc:  0.74
Epoch: 12 Training Loss:  0.56762593126297 Training Acc:  0.727
Epoch: 13 Training Loss:  0.5723421115875245 Training Acc:  0.734
Epoch: 14 Training Loss:  0.5539430947303772 Training Acc:  0.741
Epoch: 15 Training Loss:  0.5754005908966064 Training Acc:  0.729
Epoch: 16 Training Loss: 