In [1]:
!pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.13.0+cu116.html
!pip install torch-geometric
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.13.0+cu116.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import torch
import torch.nn as nn
import torchmetrics
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
import pickle

from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch.nn import Linear
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool



LOAD DATA AND PROCESS

In [3]:
train_raw = np.load('/content/drive/MyDrive/framebot_V5.1/data/train_data.npy', allow_pickle=True)
test_raw = np.load('/content/drive/MyDrive/framebot_V5.1/data/test_data.npy', allow_pickle=True)

In [4]:
def convert_data(file):
    """
    Function to covert the downloaded data files into model input
    Parameters:
    file: graph data

    Output:
    _data_list: list of graph data structures
    """
    _data_list = []

    for i in range(len(file)):

        _x = torch.tensor(file[i][0], dtype=torch.float)  
        _edge_index = torch.tensor(file[i][1], dtype=torch.long)
        _edge_attr = torch.tensor(file[i][2], dtype=torch.float)
        _y = torch.tensor(file[i][6], dtype=torch.long)
        
        _data = Data(_x, _edge_index, _edge_attr, _y)
        _data_list.append(_data)
        
    return _data_list

In [5]:
train_data = convert_data(train_raw)
test_data = convert_data(test_raw)

In [6]:
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64)

In [8]:
class GCN(torch.nn.Module):
    def __init__(self,in_channels, hidden_channels, out_channels):
        super(GCN,self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

In [9]:
model = GCN(in_channels=3, hidden_channels=64, out_channels=5)
print(model)

GCN(
  (conv1): GCNConv(3, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=5, bias=True)
)


In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

epochs = 10

In [11]:
def train(loader):
    model.train()

    correct = 0
    for data in loader:
        
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        pred = out.argmax(dim=1)
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)


def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

In [12]:
for epoch in range(1, epochs):
    train_acc = train(train_dataloader)
    test_acc = test(test_dataloader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

print("Done.")


Epoch: 001, Train Acc: 0.8734, Test Acc: 1.0000
Epoch: 002, Train Acc: 0.9728, Test Acc: 0.9737
Epoch: 003, Train Acc: 0.9785, Test Acc: 1.0000
Epoch: 004, Train Acc: 0.9810, Test Acc: 1.0000
Epoch: 005, Train Acc: 0.9886, Test Acc: 0.9553
Epoch: 006, Train Acc: 0.9876, Test Acc: 0.9590
Epoch: 007, Train Acc: 0.9896, Test Acc: 1.0000
Epoch: 008, Train Acc: 0.9908, Test Acc: 0.9740
Epoch: 009, Train Acc: 0.9877, Test Acc: 0.9983
Done.


In [13]:
torch.save(model,'/content/drive/MyDrive/framebot_V5/framebot_v5.1.pt')

In [14]:
# Load checkpoint for training
model = torch.load('/content/drive/MyDrive/framebot_V5/framebot_v5.1.pt')

In [15]:
test_raw = np.load('/content/drive/MyDrive/framebot_V5.1/data/test_gnn_data.npy', allow_pickle=True)
sample_data = convert_data(test_raw)

In [16]:
sample_data

[Data(x=[3, 3], edge_index=[2, 6], edge_attr=[6], y=0),
 Data(x=[3, 3], edge_index=[2, 6], edge_attr=[6], y=0),
 Data(x=[3, 3], edge_index=[2, 6], edge_attr=[6], y=1),
 Data(x=[3, 3], edge_index=[2, 6], edge_attr=[6], y=1),
 Data(x=[3, 3], edge_index=[2, 6], edge_attr=[6], y=2),
 Data(x=[3, 3], edge_index=[2, 6], edge_attr=[6], y=2),
 Data(x=[4, 3], edge_index=[2, 8], edge_attr=[8], y=3),
 Data(x=[4, 3], edge_index=[2, 8], edge_attr=[8], y=3),
 Data(x=[4, 3], edge_index=[2, 8], edge_attr=[8], y=4),
 Data(x=[4, 3], edge_index=[2, 8], edge_attr=[8], y=4)]

In [17]:
sample_dataloader = DataLoader(sample_data, batch_size=1, shuffle=True)

In [18]:
# get a sample from the dataset
sample = next(iter(sample_dataloader))

In [19]:
sample

DataBatch(x=[4, 3], edge_index=[2, 8], edge_attr=[8], y=[1], batch=[4], ptr=[2])

In [20]:
with torch.no_grad():
    model.eval()
    correct = 0

    out = model(sample.x, sample.edge_index, sample.batch)
    pred = out.argmax(dim=1)
    correct = int((pred == sample.y).sum())
    print("correct:", correct)

correct: 1


In [21]:
out

tensor([[ -2.6684,  -2.5894, -15.4398,  29.4051,   9.6581]])

In [22]:
pred

tensor([3])

In [23]:
sample.y

tensor([3])