<a href="https://colab.research.google.com/github/patero22/GNN-Reaserch_project/blob/main/Message_passing_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implementacja w PyTorch Geometric (PyG)


In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.3


In [None]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

# Pobieranie zbioru danych Planetoid o nazwie Cora z użyciem transformacji NormalizeFeatures
dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Pobranie pierwszego obiektu grafu z zbioru danych.

print(data)
print('===========================================================================================================')
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...


Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7
Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


Implementacja Message Passing w PyTorch Geometric


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops

class GNNConv(MessagePassing):
    def __init__(self):
        super(GNNConv, self).__init__(aggr='add')  # Agregacja przez sumowanie.

    def forward(self, x, edge_index):
        # Dodawanie pętli własnych do macierzy sąsiedztwa.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Liniowa transformacja macierzy cech węzłów.
        x = F.linear(x, torch.ones(x.size(1), x.size(1)))  # Przykładowa transformacja liniowa.

        # Obliczanie normalizacji.
        row, col = edge_index
        deg = torch.bincount(row, minlength=x.size(0)).float()
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Propagacja wiadomości.
        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        # Normalizacja cech węzłów.
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        # Zwracanie nowych osadzeń węzłów.
        return aggr_out

# Tworzenie obiektu Data i modelu
conv = GNNConv()
output = conv(data.x, data.edge_index)
print(output)


tensor([[0.9736, 0.9736, 0.9736,  ..., 0.9736, 0.9736, 0.9736],
        [1.0964, 1.0964, 1.0964,  ..., 1.0964, 1.0964, 1.0964],
        [1.0307, 1.0307, 1.0307,  ..., 1.0307, 1.0307, 1.0307],
        ...,
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0582, 1.0582, 1.0582,  ..., 1.0582, 1.0582, 1.0582],
        [0.8767, 0.8767, 0.8767,  ..., 0.8767, 0.8767, 0.8767]])


Implementacja Message Passing w Deep Graph Library (DGL)


In [None]:
!pip install dgl


Collecting dgl
  Downloading dgl-2.1.0-cp310-cp310-manylinux1_x86_64.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
Collecting torchdata>=0.5.0 (from dgl)
  Downloading torchdata-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu1

In [None]:
!pip install torch==2.0.1
!pip install dgl==1.1.0 -f https://data.dgl.ai/wheels/repo.html



Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl==1.1.0
  Using cached dgl-1.1.0-cp310-cp310-manylinux1_x86_64.whl (5.9 MB)
Installing collected packages: dgl
  Attempting uninstall: dgl
    Found existing installation: dgl 2.1.0
    Uninstalling dgl-2.1.0:
      Successfully uninstalled dgl-2.1.0
Successfully installed dgl-1.1.0


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.utils import add_self_loops
import dgl
import dgl.function as fn

# Pobieranie zbioru danych Planetoid o nazwie Cora z użyciem transformacji NormalizeFeatures
dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]

print()
print(data)
print('===========================================================================================================')
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

# Implementacja Message Passing w PyTorch Geometric
class GNNConv(torch.nn.Module):
    def __init__(self):
        super(GNNConv, self).__init__()
        self.aggr = 'add'  # Agregacja przez sumowanie.

    def forward(self, x, edge_index):
        # Dodawanie pętli własnych do macierzy sąsiedztwa.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Liniowa transformacja macierzy cech węzłów.
        x = F.linear(x, torch.ones(x.size(1), x.size(1)))  # Przykładowa transformacja liniowa.

        # Obliczanie normalizacji.
        row, col = edge_index
        deg = torch.bincount(row, minlength=x.size(0)).float()
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Propagacja wiadomości.
        return self.propagate(edge_index, x=x, norm=norm)

    def propagate(self, edge_index, x, norm):
        row, col = edge_index
        out = torch.zeros_like(x)
        for i in range(len(row)):
            out[row[i]] += norm[i] * x[col[i]]
        return out

    def message(self, x_j, norm):
        # Normalizacja cech węzłów.
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        # Zwracanie nowych osadzeń węzłów.
        return aggr_out

# Tworzenie obiektu Data i modelu
conv = GNNConv()
output = conv(data.x, data.edge_index)
print(output)

# Implementacja Message Passing w Deep Graph Library (DGL)
# Tworzenie grafu DGL z danych PyTorch Geometric
src, dst = data.edge_index
g = dgl.graph((src, dst))
g = dgl.add_self_loop(g)
g.ndata['feat'] = data.x

class GNNLayer(torch.nn.Module):
    def __init__(self):
        super(GNNLayer, self).__init__()

    def forward(self, g, feature):
        # Liniowa transformacja cech węzłów.
        feature = F.linear(feature, torch.ones(feature.size(1), feature.size(1)))  # Przykładowa transformacja liniowa.

        # Normalizacja
        degs = g.in_degrees().float().clamp(min=1)
        norm = torch.pow(degs, -0.5).to(feature.device).unsqueeze(1)
        g.ndata['h'] = feature * norm

        # Propagacja wiadomości
        g.update_all(fn.copy_u(u='h', out='m'), fn.sum(msg='m', out='h'))
        g.ndata['h'] = g.ndata['h'] * norm

        return g.ndata.pop('h')

# Tworzenie modelu i przekazanie danych
conv = GNNLayer()
output = conv(g, g.ndata['feat'])
print(output)

# Porównanie wydajności
import time

# Pomiar czasu dla PyTorch Geometric
conv = GNNConv()
start_time = time.time()
for _ in range(100):
    output = conv(data.x, data.edge_index)
print(f'PyTorch Geometric: {(time.time() - start_time) / 100:.6f} seconds per iteration')

# Pomiar czasu dla DGL
conv = GNNLayer()
start_time = time.time()
for _ in range(100):
    output = conv(g, g.ndata['feat'])
print(f'DGL: {(time.time() - start_time) / 100:.6f} seconds per iteration')


Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True
tensor([[0.9736, 0.9736, 0.9736,  ..., 0.9736, 0.9736, 0.9736],
        [1.0964, 1.0964, 1.0964,  ..., 1.0964, 1.0964, 1.0964],
        [1.0307, 1.0307, 1.0307,  ..., 1.0307, 1.0307, 1.0307],
        ...,
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0582, 1.0582, 1.0582,  ..., 1.0582, 1.0582, 1.0582],
        [0.8767, 0.8767, 0.8767,  ..., 0.8767, 0.8767, 0.8767]])


  assert input.numel() == input.storage().size(), (


tensor([[0.9736, 0.9736, 0.9736,  ..., 0.9736, 0.9736, 0.9736],
        [1.0964, 1.0964, 1.0964,  ..., 1.0964, 1.0964, 1.0964],
        [1.0307, 1.0307, 1.0307,  ..., 1.0307, 1.0307, 1.0307],
        ...,
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0582, 1.0582, 1.0582,  ..., 1.0582, 1.0582, 1.0582],
        [0.8767, 0.8767, 0.8767,  ..., 0.8767, 0.8767, 0.8767]])
PyTorch Geometric: 0.659531 seconds per iteration
DGL: 0.214324 seconds per iteration


Implementacje Message Passing
PyTorch Geometric


In [None]:
class GNNConv(torch.nn.Module):
    def __init__(self):
        super(GNNConv, self).__init__()
        self.aggr = 'add'  # Agregacja przez sumowanie.

    def forward(self, x, edge_index):
        # Dodawanie pętli własnych do macierzy sąsiedztwa.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Liniowa transformacja macierzy cech węzłów.
        x = F.linear(x, torch.ones(x.size(1), x.size(1)))  # Przykładowa transformacja liniowa.

        # Obliczanie normalizacji.
        row, col = edge_index
        deg = torch.bincount(row, minlength=x.size(0)).float()
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Propagacja wiadomości.
        return self.propagate(edge_index, x=x, norm=norm)

    def propagate(self, edge_index, x, norm):
        row, col = edge_index
        out = torch.zeros_like(x)
        for i in range(len(row)):
            out[row[i]] += norm[i] * x[col[i]]
        return out

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out


Deep Graph Library (DGL)

In [None]:
class GNNLayer(torch.nn.Module):
    def __init__(self):
        super(GNNLayer, self).__init__()

    def forward(self, g, feature):
        # Liniowa transformacja cech węzłów.
        feature = F.linear(feature, torch.ones(feature.size(1), feature.size(1)))  # Przykładowa transformacja liniowa.

        # Normalizacja
        degs = g.in_degrees().float().clamp(min=1)
        norm = torch.pow(degs, -0.5).to(feature.device).unsqueeze(1)
        g.ndata['h'] = feature * norm

        # Propagacja wiadomości
        g.update_all(fn.copy_u(u='h', out='m'), fn.sum(msg='m', out='h'))
        g.ndata['h'] = g.ndata['h'] * norm

        return g.ndata.pop('h')


Porównanie wydajności

In [None]:
import time

# Pomiar czasu dla PyTorch Geometric
conv = GNNConv()
start_time = time.time()
for _ in range(100):
    output = conv(data.x, data.edge_index)
print(f'PyTorch Geometric: {(time.time() - start_time) / 100:.6f} seconds per iteration')

# Pomiar czasu dla DGL
conv = GNNLayer()
start_time = time.time()
for _ in range(100):
    output = conv(g, g.ndata['feat'])
print(f'DGL: {(time.time() - start_time) / 100:.6f} seconds per iteration')


PyTorch Geometric: 0.586509 seconds per iteration
DGL: 0.229661 seconds per iteration


Format danych: COO vs. CSR

    COO (Coordinate Format):
        Reprezentuje graf za pomocą listy krawędzi, gdzie każda krawędź jest określona przez parę (źródło, cel).
        Stosowany do dynamicznych grafów, gdzie krawędzie mogą być często dodawane lub usuwane.
        PyTorch Geometric używa formatu COO do reprezentowania grafów (macierz edge_index).

    CSR (Compressed Sparse Row):
        Reprezentuje graf za pomocą dwóch tablic: jednej dla węzłów, a drugiej dla wartości.
        Lepszy do operacji macierzowych, np. mnożenia macierzy.
        DGL wspiera oba formaty, ale często używa CSR dla wydajności operacji macierzowych.

PyTorch Geometric (PyG)

PyTorch Geometric używa formatu COO do przechowywania grafów, co można zobaczyć w strukturze edge_index. Ten format jest bezpośrednio używany w procesie propagacji wiadomości:

edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
row, col = edge_index


Deep Graph Library (DGL)

DGL domyślnie używa formatu CSR, ale wspiera także format COO. Format CSR jest bardziej wydajny przy wykonywaniu operacji algebraicznych na dużych grafach. W DGL, graf jest tworzony z danych w formacie COO, ale może być przechowywany i przetwarzany w formacie CSR dla lepszej wydajności:

g = dgl.graph((torch.tensor(data.edge_index[0]), torch.tensor(data.edge_index[1])))
g = dgl.add_self_loop(g)


Wnioski

    Wydajność:
        PyTorch Geometric (COO): Jest bardziej elastyczny przy dynamicznych zmianach w strukturze grafu, ale może być mniej wydajny przy operacjach algebraicznych na dużych grafach.
        DGL (CSR): Jest bardziej wydajny przy operacjach algebraicznych na dużych grafach, ale mniej elastyczny przy dynamicznych zmianach.

    Użycie:
        Wybór między PyTorch Geometric a DGL zależy od konkretnego zastosowania. Jeśli operacje na grafie są głównie statyczne i algebraiczne, DGL może być bardziej odpowiedni. Jeśli struktura grafu często się zmienia, PyTorch Geometric może być lepszym wyborem.

Kilka popularnych dużych zbiorów danych dla grafów to:

    Amazon Computers and Photo:
        Graf zakupów na Amazonie, gdzie węzły reprezentują produkty, a krawędzie współzakupy.
        Dane dostępne w torch_geometric.datasets.Amazon.

    Reddit:
        Graf interakcji użytkowników Reddita.
        Dane dostępne w torch_geometric.datasets.Reddit.

    OGB (Open Graph Benchmark):
        Zbiory danych dla dużych grafów, takie jak ogbn-arxiv, ogbn-products, itd.
        Dane dostępne w ogb.nodeproppred (dla klasyfikacji węzłów).

Implementacja na Zbiorze Danych Amazon Computers i Photo
PyTorch Geometric

In [None]:
from torch_geometric.datasets import Amazon
from torch_geometric.transforms import NormalizeFeatures
import time

# Amazon Computers
dataset_computers = Amazon(root='data/AmazonComputers', name='Computers', transform=NormalizeFeatures())
data_computers = dataset_computers[0]

# Amazon Photo
dataset_photo = Amazon(root='data/AmazonPhoto', name='Photo', transform=NormalizeFeatures())
data_photo = dataset_photo[0]

# Definicja funkcji testującej
def test_pyg(dataset):
    conv_pyg = GNNConv()
    start_time = time.time()
    for _ in range(100):
        output = conv_pyg(dataset.x, dataset.edge_index)
    return (time.time() - start_time) / 100

print(f'PyTorch Geometric Amazon Computers: {test_pyg(data_computers):.6f} seconds per iteration')
print(f'PyTorch Geometric Amazon Photo: {test_pyg(data_photo):.6f} seconds per iteration')


Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!
Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_photo.npz
Processing...
Done!


PyTorch Geometric Amazon Computers: 14.390730 seconds per iteration


KeyboardInterrupt: 

Implementacja na Zbiorze Danych Amazon Computers i Photo
DGL - Deep Graph Library

In [None]:
from dgl.data import AmazonCoBuyComputerDataset, AmazonCoBuyPhotoDataset

# Amazon Computers
dataset_computers_dgl = AmazonCoBuyComputerDataset()
g_computers_dgl = dataset_computers_dgl[0]

# Amazon Photo
dataset_photo_dgl = AmazonCoBuyPhotoDataset()
g_photo_dgl = dataset_photo_dgl[0]

# Dodanie samopętli
g_computers_dgl = dgl.add_self_loop(g_computers_dgl)
g_photo_dgl = dgl.add_self_loop(g_photo_dgl)

# Definicja funkcji testującej
def test_dgl(graph):
    conv_dgl = GNNLayer()
    start_time = time.time()
    for _ in range(100):
        output = conv_dgl(graph, graph.ndata['feat'])
    return (time.time() - start_time) / 100

print(f'DGL Amazon Computers: {test_dgl(g_computers_dgl):.6f} seconds per iteration')
print(f'DGL Amazon Photo: {test_dgl(g_photo_dgl):.6f} seconds per iteration')


Implementacja na Zbiorze Danych Reddit
PyTorch Geometric

In [None]:
from torch_geometric.datasets import Reddit

# Reddit
dataset_reddit = Reddit(root='data/Reddit')
data_reddit = dataset_reddit[0]

print(f'Number of nodes: {data_reddit.num_nodes}')
print(f'Number of edges: {data_reddit.num_edges}')

print(f'PyTorch Geometric Reddit: {test_pyg(data_reddit):.6f} seconds per iteration')


Downloading https://data.dgl.ai/dataset/reddit.zip


KeyboardInterrupt: 

DGL

In [None]:
from dgl.data import RedditDataset

# Reddit
dataset_reddit_dgl = RedditDataset()
g_reddit_dgl = dataset_reddit_dgl[0]

# Dodanie samopętli
g_reddit_dgl = dgl.add_self_loop(g_reddit_dgl)

print(f'DGL Reddit: {test_dgl(g_reddit_dgl):.6f} seconds per iteration')


## PODZIAL NA GPU I CPU

Implementacja na Zbiorze Danych Karate Club i Citeseer
PyTorch Geometric

PyTorch Geometric z Obsługą CPU i GPU

In [None]:
import torch
from torch_geometric.datasets import KarateClub, Planetoid
from torch_geometric.transforms import NormalizeFeatures
import time

# Karate Club
dataset_karate = KarateClub()
data_karate = dataset_karate[0]

# Citeseer
dataset_citeseer = Planetoid(root='data/Planetoid', name='Citeseer', transform=NormalizeFeatures())
data_citeseer = dataset_citeseer[0]

# Funkcja testująca z obsługą CPU i GPU
def test_pyg(dataset, device):
    conv_pyg = GNNConv().to(device)
    dataset = dataset.to(device)
    start_time = time.time()
    for _ in range(100):
        output = conv_pyg(dataset.x, dataset.edge_index)
    return (time.time() - start_time) / 100

# Wybór urządzenia
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'PyTorch Geometric Karate Club on CPU: {test_pyg(data_karate, device_cpu):.6f} seconds per iteration')
print(f'PyTorch Geometric Karate Club on GPU: {test_pyg(data_karate, device_gpu):.6f} seconds per iteration')
print(f'PyTorch Geometric Citeseer on CPU: {test_pyg(data_citeseer, device_cpu):.6f} seconds per iteration')
print(f'PyTorch Geometric Citeseer on GPU: {test_pyg(data_citeseer, device_gpu):.6f} seconds per iteration')


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


PyTorch Geometric Karate Club on CPU: 0.004782 seconds per iteration
PyTorch Geometric Karate Club on GPU: 0.004629 seconds per iteration
PyTorch Geometric Citeseer on CPU: 2.034567 seconds per iteration
PyTorch Geometric Citeseer on GPU: 2.055309 seconds per iteration


DGL z Obsługą CPU i GPU

In [None]:
import dgl
import torch
import time
from dgl.data import KarateClubDataset, CiteseerGraphDataset
from dgl.nn import GraphConv

# Karate Club Dataset
dataset_karate_dgl = KarateClubDataset()
g_karate_dgl = dataset_karate_dgl[0]
g_karate_dgl = dgl.add_self_loop(g_karate_dgl)
# Losowe cechy węzłów o wymiarze 1433 (zgodne z modelem)
g_karate_dgl.ndata['feat'] = torch.randn(g_karate_dgl.num_nodes(), 1433)

# Citeseer Dataset
dataset_citeseer_dgl = CiteseerGraphDataset()
g_citeseer_dgl = dataset_citeseer_dgl[0]
g_citeseer_dgl = dgl.add_self_loop(g_citeseer_dgl)

class GNNLayerKarate(torch.nn.Module):
    def __init__(self):
        super(GNNLayerKarate, self).__init__()
        self.conv1 = GraphConv(1433, 16)
        self.conv2 = GraphConv(16, 7)

    def forward(self, g, feature):
        h = self.conv1(g, feature)
        h = torch.relu(h)
        h = self.conv2(g, h)
        return h

class GNNLayerCiteseer(torch.nn.Module):
    def __init__(self):
        super(GNNLayerCiteseer, self).__init__()
        self.conv1 = GraphConv(3703, 16)
        self.conv2 = GraphConv(16, 6)

    def forward(self, g, feature):
        h = self.conv1(g, feature)
        h = torch.relu(h)
        h = self.conv2(g, h)
        return h

def test_dgl(graph, model, device):
    model = model.to(device)
    graph = graph.to(device)
    feature = graph.ndata['feat'].to(device)
    start_time = time.time()
    for _ in range(100):
        output = model(graph, feature)
    return (time.time() - start_time) / 100

# Wybór urządzenia
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Testy
print(f'DGL Karate Club on CPU: {test_dgl(g_karate_dgl, GNNLayerKarate(), device_cpu):.6f} seconds per iteration')
print(f'DGL Karate Club on GPU: {test_dgl(g_karate_dgl, GNNLayerKarate(), device_gpu):.6f} seconds per iteration')
print(f'DGL Citeseer on CPU: {test_dgl(g_citeseer_dgl, GNNLayerCiteseer(), device_cpu):.6f} seconds per iteration')
print(f'DGL Citeseer on GPU: {test_dgl(g_citeseer_dgl, GNNLayerCiteseer(), device_gpu):.6f} seconds per iteration')


Downloading /root/.dgl/citeseer.zip from https://data.dgl.ai/dataset/citeseer.zip...
Extracting file to /root/.dgl/citeseer_d6836239


  r_inv = np.power(rowsum, -1).flatten()


Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.
DGL Karate Club on CPU: 0.009004 seconds per iteration
DGL Karate Club on GPU: 0.002409 seconds per iteration
DGL Citeseer on CPU: 0.059976 seconds per iteration
DGL Citeseer on GPU: 0.067811 seconds per iteration


## PyTorch Geometric - Amazon Dataset

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Amazon
from torch_geometric.nn import GCNConv
import time

# Load Amazon Computers dataset
dataset_amazon = Amazon(root='data/Amazon', name='Computers')
data_amazon = dataset_amazon[0]

# Define a simple GCN model
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(dataset_amazon.num_features, 16)
        self.conv2 = GCNConv(16, dataset_amazon.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

model = GCN()

# Test function for PyG
def test_pyg(data, device):
    data = data.to(device)
    model.to(device)
    start_time = time.time()
    for _ in range(100):
        out = model(data)
    return (time.time() - start_time) / 100

device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'PyG Amazon Computers on CPU: {test_pyg(data_amazon, device_cpu):.6f} seconds per iteration')
print(f'PyG Amazon Computers on GPU: {test_pyg(data_amazon, device_gpu):.6f} seconds per iteration')


Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!


PyG Amazon Computers on CPU: 0.164049 seconds per iteration
PyG Amazon Computers on GPU: 0.167240 seconds per iteration


## DGL - Amazon Dataset

In [None]:
import dgl
from dgl.data import AmazonCoBuyComputerDataset
import torch.nn as nn

# Load Amazon Computers dataset
dataset_amazon_dgl = AmazonCoBuyComputerDataset()
g_amazon_dgl = dataset_amazon_dgl[0]
g_amazon_dgl = dgl.add_self_loop(g_amazon_dgl)
g_amazon_dgl.ndata['feat'] = torch.randn(g_amazon_dgl.num_nodes(), dataset_amazon.num_features)

# Define a simple GCN model for DGL
class GCN(nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(dataset_amazon.num_features, 16)
        self.conv2 = GraphConv(16, dataset_amazon.num_classes)

    def forward(self, g, features):
        x = self.conv1(g, features)
        x = F.relu(x)
        x = self.conv2(g, x)
        return x

model = GCN()

# Test function for DGL
def test_dgl(g, device):
    g = g.to(device)
    model.to(device)
    features = g.ndata['feat'].to(device)
    start_time = time.time()
    for _ in range(100):
        out = model(g, features)
    return (time.time() - start_time) / 100

print(f'DGL Amazon Computers on CPU: {test_dgl(g_amazon_dgl, device_cpu):.6f} seconds per iteration')
print(f'DGL Amazon Computers on GPU: {test_dgl(g_amazon_dgl, device_gpu):.6f} seconds per iteration')


Downloading /root/.dgl/amazon_co_buy_computer.zip from https://data.dgl.ai/dataset/amazon_co_buy_computer.zip...
Extracting file to /root/.dgl/amazon_co_buy_computer_b5999b2e
DGL Amazon Computers on CPU: 0.066257 seconds per iteration
DGL Amazon Computers on GPU: 0.053029 seconds per iteration


# REDDIT DATASET

## PyTorch Geometric - Reddit Dataset

In [None]:
pip install torch torch-geometric dgl




In [None]:
from torch_geometric.datasets import Reddit

# Load Reddit dataset
dataset_reddit = Reddit(root='data/Reddit')
data_reddit = dataset_reddit[0]

# Define a simple GCN model
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(dataset_reddit.num_features, 16)
        self.conv2 = GCNConv(16, dataset_reddit.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

model = GCN()

print(f'PyG Reddit on CPU: {test_pyg(data_reddit, device_cpu):.6f} seconds per iteration')
print(f'PyG Reddit on GPU: {test_pyg(data_reddit, device_gpu):.6f} seconds per iteration')


DGL - Reddit Dataset

In [None]:
from dgl.data import RedditDataset

# Load Reddit dataset
dataset_reddit_dgl = RedditDataset()
g_reddit_dgl = dataset_reddit_dgl[0]
g_reddit_dgl = dgl.add_self_loop(g_reddit_dgl)
g_reddit_dgl.ndata['feat'] = torch.randn(g_reddit_dgl.num_nodes(), dataset_reddit.num_features)

print(f'DGL Reddit on CPU: {test_dgl(g_reddit_dgl, device_cpu):.6f} seconds per iteration')
print(f'DGL Reddit on GPU: {test_dgl(g_reddit_dgl, device_gpu):.6f} seconds per iteration')


Downloading /root/.dgl/reddit.zip from https://data.dgl.ai/dataset/reddit.zip...
Extracting file to /root/.dgl/reddit_69f818f5
