# IIC-3641 GML UC

In [1]:
!python3 -m pip list

[0mPackage                            Version
---------------------------------- -----------------------------------------
absl-py                            1.1.0
accelerate                         0.26.1
aggdraw                            1.3.16
aiohttp                            3.8.4
aiosignal                          1.2.0
alabaster                          0.7.12
ann-visualizer                     2.5
annotated-types                    0.6.0
antlr4-python3-runtime             4.9.3
anyio                              3.7.0
AnyQt                              0.2.0
anytree                            2.8.0
appdirs                            1.4.4
appnope                            0.1.3
apturl                             0.5.2
argon2-cffi                        21.3.0
argon2-cffi-bindings               21.2.0
array-record                       0.4.0
arrow                              1.3.0
astor                              0.8.1
asttokens                          2.4.1
astunparse  

Orange3-Bioinformatics             4.5.0
Orange3-Educational                0.5.0
Orange3-Explain                    0.6.0
Orange3-Geo                        0.3.0
Orange3-ImageAnalytics             0.8.0
Orange3-Network                    1.7.0
Orange3-Prototypes                 0.18.0
Orange3-Survival-Analysis          0.4.0
Orange3-Text                       1.11.0
Orange3-Textable                   3.1.11
Orange3-Timeseries                 0.5.1
Orange3-WorldHappiness             0.1.8
outcome                            1.2.0
overrides                          7.7.0
Owlready2                          0.38
packaging                          23.1
pandas                             1.3.5
pandas-datareader                  0.10.0
pandocfilters                      1.5.0
paramiko                           2.6.0
parse                              1.19.1
parsel                             1.8.1
parso                              0.8.3
pathspec                       

In [2]:
import torch
print(torch.__version__)

device = torch.device('cuda')

2.4.1+cu118


In [3]:
import os
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch.nn import BatchNorm1d as BN
from torch.nn import Linear, ReLU, Sequential
from torch.optim import Adam
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GINConv, global_mean_pool
from torch_geometric.utils import degree

  _torch_pytree._register_pytree_node(


## Vamos a trabajar con una dataset de Reddit para clasificación binaria de grafos

In [4]:
dataset = TUDataset("data/", "REDDIT-BINARY", cleaned=False)
dataset.data.edge_attr = None




### Referencia a datasets de TUDataset: https://grlplus.github.io/papers/79.pdf

### Ver documentación de los datasets en: https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.datasets.TUDataset.html

## Aquí va una función de normalización de grado

In [5]:
class NormalizedDegree:
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, data):
        deg = degree(data.edge_index[0], dtype=torch.float)
        deg = (deg - self.mean) / self.std
        data.x = deg.view(-1, 1)
        return data



## Se usa one hot para los X, usando el grado del nodo, ya que el dataset no tiene X

In [6]:
if dataset.data.x is None:
    max_degree = 0
    degs = []
    for data in dataset:
        degs += [degree(data.edge_index[0], dtype=torch.long)]
        max_degree = max(max_degree, degs[-1].max().item())

    print(max_degree)

    if max_degree < 1000:
        dataset.transform = T.OneHotDegree(max_degree)
    else:
        deg = torch.cat(degs, dim=0).to(torch.float)
        mean, std = deg.mean().item(), deg.std().item()
        dataset.transform = NormalizedDegree(mean, std)

3062


In [7]:
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [8]:
def num_graphs(data):
    if hasattr(data, "num_graphs"):
        return data.num_graphs
    else:
        return data.x.size(0)

## Se define el modelo GIN

In [9]:
class GIN0(torch.nn.Module):
    def __init__(self, dataset, num_layers, hidden):
        super().__init__()
        self.conv1 = GINConv(
            Sequential(
                Linear(dataset.num_features, hidden),
                ReLU(),
                Linear(hidden, hidden),
                ReLU(),
                BN(hidden),
            ),
            train_eps=False,
        )
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(
                GINConv(
                    Sequential(
                        Linear(hidden, hidden),
                        ReLU(),
                        Linear(hidden, hidden),
                        ReLU(),
                        BN(hidden),
                    ),
                    train_eps=False,
                )
            )
        self.lin1 = Linear(hidden, hidden)
        self.lin2 = Linear(hidden, dataset.num_classes)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        for conv in self.convs:
            x = conv(x, edge_index)
        x = global_mean_pool(x, batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


# model = GIN0(dataset, config.num_layers, config.latent_dim)
model = GIN0(dataset, 5, 32)

## Se definen los ciclos de training y evaluación

In [10]:
def train(model, optimizer, loader):
    model.train()

    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        data = data.to(device)
        out = model(data)
        loss = F.nll_loss(out, data.y.view(-1))
        loss.backward()
        total_loss += loss.item() * num_graphs(data)
        optimizer.step()
    return total_loss / len(loader.dataset)


def eval(model, loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            pred = model(data).max(1)[1]
        correct += pred.eq(data.y.view(-1)).sum().item()
    return correct / len(loader.dataset)

## Optimizer y entrenamiento

In [11]:
model.to(device).reset_parameters()
optimizer = Adam(model.parameters(), lr=0.01)

for epoch in range(1, 50 + 1):
    train_loss = train(model, optimizer, train_loader)
    train_acc = eval(model, train_loader)
    print("Epoch:", epoch,"Training Loss: ", train_loss, "Training Acc: ", train_acc)

Epoch: 1 Training Loss:  0.581225879907608 Training Acc:  0.6855
Epoch: 2 Training Loss:  0.5407945499420166 Training Acc:  0.7655
Epoch: 3 Training Loss:  0.4885126895904541 Training Acc:  0.7725
Epoch: 4 Training Loss:  0.47090305578708647 Training Acc:  0.784
Epoch: 5 Training Loss:  0.44570968961715696 Training Acc:  0.836
Epoch: 6 Training Loss:  0.4549015798568726 Training Acc:  0.798
Epoch: 7 Training Loss:  0.43515063667297366 Training Acc:  0.7615
Epoch: 8 Training Loss:  0.46540514516830445 Training Acc:  0.7115
Epoch: 9 Training Loss:  0.40907820510864257 Training Acc:  0.831
Epoch: 10 Training Loss:  0.4171438689231873 Training Acc:  0.702
Epoch: 11 Training Loss:  0.45854467010498046 Training Acc:  0.803
Epoch: 12 Training Loss:  0.4493528497219086 Training Acc:  0.8495
Epoch: 13 Training Loss:  0.43879363012313843 Training Acc:  0.719
Epoch: 14 Training Loss:  0.38880557572841645 Training Acc:  0.8
Epoch: 15 Training Loss:  0.37944952273368837 Training Acc:  0.8125
Epoch: