<a href="https://colab.research.google.com/github/tamara-kostova/IIS/blob/master/lab5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch
!pip install torch_geometric
!pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.2.0+cpu.html


# **EXERCISE 1**

GraphSAGE model

In [2]:
import torch
from torch.nn import Linear
from torch.nn.functional import dropout
from torch_geometric.nn import SAGEConv, global_mean_pool


class GraphSAGE(torch.nn.Module):
    def __init__(self, num_classes):
        super(GraphSAGE, self).__init__()

        self.conv1 = SAGEConv((-1, -1), 64)
        self.conv2 = SAGEConv((-1, -1), 128)
        self.conv3 = SAGEConv((-1, -1), 64)

        self.linear1 = Linear(64, num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = dropout(x, p=0.5, training=self.training)
        x = self.linear1(x)

        return x

Function for training

In [3]:
def train(model, train_loader, val_loader, optimizer, criterion, epochs=5):
    for epoch in range(epochs):
        for i, batch in enumerate(train_loader):
            model.train()

            out = model(batch.x, batch.edge_index, batch.batch)

            loss = criterion(out, batch.y)
            loss.backward()
            train_loss = loss.item()

            optimizer.step()
            optimizer.zero_grad()

            print(f'Epoch: {epoch:03d}, Step: {i:03d}, Loss: {train_loss:.4f}')

        for i, batch in enumerate(val_loader):
            model.eval()

            out = model(batch.x, batch.edge_index, batch.batch)

            loss = criterion(out, batch.y)
            val_loss = loss.item()

            print(f'Epoch: {epoch:03d}, Step: {i:03d}, Val Loss: {val_loss:.4f}')

Function for testing

In [4]:
def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    predictions = []
    true_labels = []

    with torch.inference_mode():
        for i, batch in enumerate(test_loader):
            out = model(batch.x, batch.edge_index, batch.batch)

            pred = out.argmax(dim=1)

            predictions.extend(pred.cpu().numpy())
            true_labels.extend(batch.y.cpu().numpy())

    return predictions, true_labels

Load dataset

In [5]:
from torch_geometric.datasets import TUDataset
data = TUDataset(root='../data/TUDataset/DD', name='DD', use_node_attr=True)
data

DD(1178)

In [6]:
print(data.num_classes)
print(data.num_features)

2
89


In [7]:
dataset = data[0]
dataset

Data(edge_index=[2, 1798], x=[327, 89], y=[1])

In [29]:
from torch.utils.data import random_split
train_set, val_set, test_set = random_split(data, [0.7, 0.2, 0.1])

In [30]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_set, batch_size=2, shuffle=True)
val_loader = DataLoader(val_set, batch_size=2, shuffle = True)

In [10]:
model1 = GraphSAGE(num_classes=2)

In [11]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss

optimizer = Adam(model1.parameters(), lr=0.01)
criterion = CrossEntropyLoss()

Train the model

In [12]:
train(model1, train_loader, val_loader, optimizer, criterion, 20)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: 010, Step: 310, Loss: 0.4570
Epoch: 010, Step: 311, Loss: 1.3891
Epoch: 010, Step: 312, Loss: 0.4209
Epoch: 010, Step: 313, Loss: 0.4723
Epoch: 010, Step: 314, Loss: 0.3124
Epoch: 010, Step: 315, Loss: 0.9224
Epoch: 010, Step: 316, Loss: 1.5364
Epoch: 010, Step: 317, Loss: 0.5179
Epoch: 010, Step: 318, Loss: 1.0029
Epoch: 010, Step: 319, Loss: 0.2454
Epoch: 010, Step: 320, Loss: 0.9181
Epoch: 010, Step: 321, Loss: 0.3671
Epoch: 010, Step: 322, Loss: 0.2118
Epoch: 010, Step: 323, Loss: 0.8704
Epoch: 010, Step: 324, Loss: 0.5316
Epoch: 010, Step: 325, Loss: 0.3047
Epoch: 010, Step: 326, Loss: 0.2652
Epoch: 010, Step: 327, Loss: 0.6157
Epoch: 010, Step: 328, Loss: 0.9418
Epoch: 010, Step: 329, Loss: 0.9850
Epoch: 010, Step: 330, Loss: 0.6982
Epoch: 010, Step: 331, Loss: 0.5200
Epoch: 010, Step: 332, Loss: 0.8916
Epoch: 010, Step: 333, Loss: 0.6006
Epoch: 010, Step: 334, Loss: 1.5754
Epoch: 010, Step: 335, Loss: 0.6217

## **Results**

In [13]:
from sklearn.metrics import classification_report

test_loader = DataLoader(test_set, batch_size=2, shuffle=True)
test_y, preds = test(model1, test_loader, criterion)

print(classification_report(test_y, preds))

              precision    recall  f1-score   support

           0       0.87      0.65      0.75        95
           1       0.28      0.59      0.38        22

    accuracy                           0.64       117
   macro avg       0.58      0.62      0.56       117
weighted avg       0.76      0.64      0.68       117



# **EXERCISE 2**

GCN model

In [14]:
from torch_geometric.nn import global_mean_pool, GCNConv
class GCN(torch.nn.Module):
    def __init__(self, num_classes):
        super(GCN, self).__init__()

        self.conv1 = GCNConv(-1, 64)
        self.conv2 = GCNConv(-1, 128)
        self.conv3 = GCNConv(-1, 64)

        self.linear1 = Linear(64, num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = dropout(x, p=0.5, training=self.training)
        x = self.linear1(x)

        return x

In [19]:
model2 = GCN(num_classes=2)

In [20]:
optimizer = Adam(model2.parameters(), lr=0.01)
criterion = CrossEntropyLoss()

In [31]:
train(model2, train_loader, val_loader, optimizer, criterion, 20)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: 010, Step: 310, Loss: 0.6954
Epoch: 010, Step: 311, Loss: 0.7054
Epoch: 010, Step: 312, Loss: 0.5386
Epoch: 010, Step: 313, Loss: 0.6950
Epoch: 010, Step: 314, Loss: 0.5502
Epoch: 010, Step: 315, Loss: 0.5287
Epoch: 010, Step: 316, Loss: 0.5265
Epoch: 010, Step: 317, Loss: 0.7108
Epoch: 010, Step: 318, Loss: 0.7111
Epoch: 010, Step: 319, Loss: 0.8734
Epoch: 010, Step: 320, Loss: 0.7116
Epoch: 010, Step: 321, Loss: 0.5410
Epoch: 010, Step: 322, Loss: 0.9100
Epoch: 010, Step: 323, Loss: 0.5293
Epoch: 010, Step: 324, Loss: 0.7073
Epoch: 010, Step: 325, Loss: 0.5284
Epoch: 010, Step: 326, Loss: 0.6954
Epoch: 010, Step: 327, Loss: 0.6957
Epoch: 010, Step: 328, Loss: 0.5253
Epoch: 010, Step: 329, Loss: 0.5358
Epoch: 010, Step: 330, Loss: 0.7164
Epoch: 010, Step: 331, Loss: 0.7171
Epoch: 010, Step: 332, Loss: 0.5154
Epoch: 010, Step: 333, Loss: 0.5293
Epoch: 010, Step: 334, Loss: 0.6943
Epoch: 010, Step: 335, Loss: 0.9436

## **Results**

In [33]:
test_y, preds = test(model2, test_loader, criterion)

print(classification_report(test_y, preds))

              precision    recall  f1-score   support

           0       1.00      0.61      0.76       117
           1       0.00      0.00      0.00         0

    accuracy                           0.61       117
   macro avg       0.50      0.30      0.38       117
weighted avg       1.00      0.61      0.76       117



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# **Conclusion**

The first model performs better on this dataset.