<a href="https://colab.research.google.com/github/patero22/GNN-Reaserch_project/blob/main/GNN_V3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch==2.2.1 torchvision torchaudio
!pip install torch-geometric
!pip install dgl==2.1.0
!pip install memory-profiler

Collecting torch==2.2.1
  Downloading torch-2.2.1-cp310-cp310-manylinux1_x86_64.whl (755.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m973.4 kB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.1)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.1)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv, SAGEConv, PNAConv
from dgl.nn.pytorch import GraphConv, GATConv as GATConvDGL, SAGEConv as SAGEConvDGL
from dgl.nn import PNAConv

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [None]:
from scipy.sparse import coo_matrix, csr_matrix

# Conversion Functions
def convert_to_coo(edge_index, num_nodes):
    row, col = edge_index
    data = torch.ones(row.size(0))
    coo = coo_matrix((data.numpy(), (row.numpy(), col.numpy())), shape=(num_nodes, num_nodes))
    return coo

def convert_to_csr(edge_index, num_nodes):
    row, col = edge_index
    data = torch.ones(row.size(0))
    csr = csr_matrix((data.numpy(), (row.numpy(), col.numpy())), shape=(num_nodes, num_nodes))
    return csr

In [None]:
# Profiling Function
from memory_profiler import memory_usage
import time

def profile_model(model, data, device, dgl=False, format='coo'):
    data = data.to(device)
    model = model.to(device)

    def forward_pass():
        if dgl:
            model(data, data.ndata['feat'])
        else:
            model(data)

    # Measure time
    start_time = time.time()
    for _ in range(100):
        forward_pass()
    end_time = time.time()

    # Measure peak memory usage
    mem_usage = memory_usage(forward_pass, interval=0.1)

    return (end_time - start_time) / 100, max(mem_usage)

In [None]:
from torch_geometric.datasets import KarateClub, Planetoid
from dgl.data import KarateClubDataset, CiteseerGraphDataset

In [None]:
# Load PyG Datasets
dataset_karate_pyg = KarateClub()
data_karate_pyg = dataset_karate_pyg[0]

dataset_citeseer_pyg = Planetoid(root='data/Citeseer', name='Citeseer')
data_citeseer_pyg = dataset_citeseer_pyg[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


In [None]:
# Load DGL Datasets
def load_karate_dgl():
    dataset = KarateClubDataset()
    graph = dataset[0]
    return graph

def load_citeseer_dgl():
    dataset = CiteseerGraphDataset()
    graph = dataset[0]
    return graph

graph_karate_dgl = load_karate_dgl()
graph_citeseer_dgl = load_citeseer_dgl()

Downloading /root/.dgl/citeseer.zip from https://data.dgl.ai/dataset/citeseer.zip...


/root/.dgl/citeseer.zip:   0%|          | 0.00/239k [00:00<?, ?B/s]

Extracting file to /root/.dgl/citeseer_d6836239
Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.


In [None]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.data import Data
import dgl
from dgl.nn.pytorch import GATConv as GATConvDGL, GraphConv, SAGEConv as SAGEConvDGL, PNAConv as PNAConvDGL

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

class GCN_DGL(torch.nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCN_DGL, self).__init__()
        self.conv1 = GraphConv(in_feats, 16)
        self.conv2 = GraphConv(16, out_feats)

    def forward(self, g, features):
        x = self.conv1(g, features)
        x = F.relu(x)
        x = self.conv2(g, x)
        return x


# Define GAT model for PyG
class GAT(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GAT, self).__init__()
        self.conv1 = torch_geometric.nn.GATConv(in_channels, 8, heads=8)
        self.conv2 = torch_geometric.nn.GATConv(8 * 8, out_channels, heads=1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

# Define GAT model for DGL
class GAT_DGL(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GAT_DGL, self).__init__()
        self.conv1 = GATConvDGL(in_channels, 8, num_heads=8)
        self.conv2 = GATConvDGL(8 * 8, out_channels, num_heads=1)

    def forward(self, g, features):
        x = self.conv1(g, features).flatten(1)
        x = F.elu(x)
        x = self.conv2(g, x).mean(1)
        return x

# Define GraphSAGE model for PyG
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = torch_geometric.nn.SAGEConv(in_channels, 16, aggr='mean')
        self.conv2 = torch_geometric.nn.SAGEConv(16, out_channels, aggr='mean')

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

# Define GraphSAGE model for DGL
class GraphSAGE_DGL(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GraphSAGE_DGL, self).__init__()
        self.conv1 = SAGEConvDGL(in_channels, 16, aggregator_type='mean')
        self.conv2 = SAGEConvDGL(16, out_channels, aggregator_type='mean')

    def forward(self, g, features):
        x = self.conv1(g, features)
        x = F.elu(x)
        x = self.conv2(g, x)
        return x

# Define PNA model for PyG
# class PNA(torch.nn.Module):
#     def __init__(self, in_channels, out_channels, deg):
#         super(PNA, self).__init__()
#         self.conv1 = torch_geometric.nn.PNAConv(in_channels, 16, aggregators=['mean', 'max', 'min'], scalers=['identity'], deg=deg)
#         self.conv2 = torch_geometric.nn.PNAConv(16, out_channels, aggregators=['mean', 'max', 'min'], scalers=['identity'], deg=deg)

#     def forward(self, data):
#         x, edge_index = data.x, data.edge_index
#         x = self.conv1(x, edge_index)
#         x = F.elu(x)
#         x = self.conv2(x, edge_index)
#         return x

# Define PNA model for DGL
# class PNA_DGL(torch.nn.Module):
#     def __init__(self, in_channels, out_channels):
#         super(PNA_DGL, self).__init__()
#         self.conv1 = PNAConvDGL(in_channels, 16)
#         self.conv2 = PNAConvDGL(16, out_channels)

#     def forward(self, g, features):
#         x = self.conv1(g, features)
#         x = F.elu(x)
#         x = self.conv2(g, x)
#         return x


In [None]:

# Ensure features and labels are set for DGL graphs
def ensure_dgl_features_and_labels(graph, pyg_data):
    graph.ndata['feat'] = pyg_data.x.clone().detach()
    graph.ndata['label'] = pyg_data.y.clone().detach()
    return graph

graph_karate_dgl = ensure_dgl_features_and_labels(graph_karate_dgl, data_karate_pyg)
graph_citeseer_dgl = ensure_dgl_features_and_labels(graph_citeseer_dgl, data_citeseer_pyg)

# Define devices
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define and test models on COO and CSR formats
formats = ['coo', 'csr']
models = {
    'GCN': (GCN, GCN_DGL),
    'GAT': (GAT, GAT_DGL),
    'GraphSAGE': (GraphSAGE, GraphSAGE_DGL),
    #'PNA': (PNA, PNA_DGL)
}

datasets_pyg = {
    'Karate Club': (dataset_karate_pyg, data_karate_pyg),
    'Citeseer': (dataset_citeseer_pyg, data_citeseer_pyg)
}

datasets_dgl = {
    'Karate Club': graph_karate_dgl,
    'Citeseer': graph_citeseer_dgl
}

for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')


PyG Karate Club GCN (COO) on CPU: 0.001191 seconds per iteration, 7164.95 MB peak memory
PyG Karate Club GCN (COO) on GPU: 0.002448 seconds per iteration, 7164.95 MB peak memory
PyG Karate Club GCN (CSR) on CPU: 0.004803 seconds per iteration, 7164.95 MB peak memory
PyG Karate Club GCN (CSR) on GPU: 0.001869 seconds per iteration, 7164.95 MB peak memory
PyG Citeseer GCN (COO) on CPU: 0.020012 seconds per iteration, 7164.96 MB peak memory
PyG Citeseer GCN (COO) on GPU: 0.019383 seconds per iteration, 7164.96 MB peak memory
PyG Citeseer GCN (CSR) on CPU: 0.019447 seconds per iteration, 7164.96 MB peak memory
PyG Citeseer GCN (CSR) on GPU: 0.022810 seconds per iteration, 7164.96 MB peak memory
DGL Karate Club GCN (COO) on CPU: 0.005057 seconds per iteration, 7165.03 MB peak memory
DGL Karate Club GCN (COO) on GPU: 0.003122 seconds per iteration, 7165.03 MB peak memory
DGL Karate Club GCN (CSR) on CPU: 0.003300 seconds per iteration, 7165.03 MB peak memory
DGL Karate Club GCN (CSR) on GPU:

In [None]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid, KarateClub
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.nn.pytorch import GATConv as GATConvDGL, SAGEConv as SAGEConvDGL

In [None]:
dataset_cora_pyg = Planetoid(root='/tmp/Cora', name='Cora')
data_cora_pyg = dataset_cora_pyg[0]

dataset_pubmed_pyg = Planetoid(root='/tmp/Pubmed', name='Pubmed')
data_pubmed_pyg = dataset_pubmed_pyg[0]

# Load datasets for DGL

graph_cora_dgl = CoraGraphDataset()[0]
graph_cora_dgl = ensure_dgl_features_and_labels(graph_cora_dgl, data_cora_pyg)

graph_pubmed_dgl = PubmedGraphDataset()[0]
graph_pubmed_dgl = ensure_dgl_features_and_labels(graph_pubmed_dgl, data_pubmed_pyg)

# Define devices
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define and test models on COO and CSR formats
formats = ['coo', 'csr']
models = {
    'GCN': (GCN, GCN_DGL),
    'GAT': (GAT, GAT_DGL),
    'GraphSAGE': (GraphSAGE, GraphSAGE_DGL),
    #'PNA': (PNA, PNA_DGL)
}

datasets_pyg = {
    'Karate Club': (dataset_karate_pyg, data_karate_pyg),
    'Citeseer': (dataset_citeseer_pyg, data_citeseer_pyg),
    'Cora': (dataset_cora_pyg, data_cora_pyg),
    'Pubmed': (dataset_pubmed_pyg, data_pubmed_pyg)
}

datasets_dgl = {
    'Karate Club': graph_karate_dgl,
    'Citeseer': graph_citeseer_dgl,
    'Cora': graph_cora_dgl,
    'Pubmed': graph_pubmed_dgl
}

for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
PyG Karate Club GCN (COO) on CPU: 0.002038 seconds per iteration, 7158.20 MB peak memory
PyG Karate Club GCN (COO) on GPU: 0.001737 seconds per iteration, 7158.20 MB peak memory
PyG Karate Club GCN (CSR) on CPU: 0.001650 seconds per iteration, 7158.20 MB peak memory
PyG Karate Club GCN (CSR) on GPU: 0.001771 seconds per iteration, 7158.20 MB peak memory
PyG Citeseer GCN (COO) on CPU: 0.019733 seconds per iteration, 7158.20 MB peak memory
PyG Citeseer GCN (COO) on GPU: 0.019869 seconds per iteration, 7158.20 MB peak memory
PyG Citeseer GCN (CSR) on CPU: 0.021990 seconds per iteration, 7158.20 MB peak memory
PyG Citeseer GCN (CSR) 

In [None]:
import torch
import torch_geometric
from torch_geometric.datasets import Amazon, Coauthor, PPI, Flickr
import dgl
from dgl.data import PPIDataset, FlickrDataset

### AmazonComputers Dataset
correct





In [None]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid, KarateClub, Amazon, Coauthor, PPI, Reddit, Flickr
import dgl
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, PPIDataset, RedditDataset, FlickrDataset
from dgl.nn.pytorch import GATConv as GATConvDGL, SAGEConv as SAGEConvDGL

# Load datasets for PyG
dataset_amazon_computers_pyg = Amazon(root='/tmp/AmazonComputers', name='Computers')
data_amazon_computers_pyg = dataset_amazon_computers_pyg[0]
# Load datasets for DGL
graph_amazon_computers_dgl = dgl.graph((data_amazon_computers_pyg.edge_index[0], data_amazon_computers_pyg.edge_index[1]))

# Ensure features and labels are set for DGL graphs
def ensure_dgl_features_and_labels(graph, pyg_data):
    graph.ndata['feat'] = pyg_data.x.clone().detach()
    graph.ndata['label'] = pyg_data.y.clone().detach()
    return graph

graph_amazon_computers_dgl = ensure_dgl_features_and_labels(graph_amazon_computers_dgl, data_amazon_computers_pyg)

# Add self-loops to the DGL graphs to handle 0-in-degree nodes
graphs_dgl = {
    'Amazon Computers': graph_amazon_computers_dgl
}

for name, graph in graphs_dgl.items():
    graphs_dgl[name] = dgl.add_self_loop(graph)

datasets_pyg_amazon_computers = {
    'Amazon Computers': (dataset_amazon_computers_pyg, data_amazon_computers_pyg)
}

datasets_dgl_amazon_computers = {
    'Amazon Computers': graphs_dgl['Amazon Computers']
}

In [None]:
# Define devices
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define and test models on COO and CSR formats
formats = ['coo', 'csr']
models = {
    'GCN': (GCN, GCN_DGL),
    'GAT': (GAT, GAT_DGL),
    'GraphSAGE': (GraphSAGE, GraphSAGE_DGL),
    #'PNA': (PNA, PNA_DGL)
}

for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg_amazon_computers.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl_amazon_computers.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

PyG Amazon Computers GCN (COO) on CPU: 0.211311 seconds per iteration, 5548.11 MB peak memory
PyG Amazon Computers GCN (COO) on GPU: 0.148071 seconds per iteration, 5548.14 MB peak memory
PyG Amazon Computers GCN (CSR) on CPU: 0.153019 seconds per iteration, 5548.14 MB peak memory
PyG Amazon Computers GCN (CSR) on GPU: 0.149438 seconds per iteration, 5548.14 MB peak memory
DGL Amazon Computers GCN (COO) on CPU: 0.032600 seconds per iteration, 5510.53 MB peak memory
DGL Amazon Computers GCN (COO) on GPU: 0.032824 seconds per iteration, 5510.53 MB peak memory
DGL Amazon Computers GCN (CSR) on CPU: 0.036971 seconds per iteration, 5510.53 MB peak memory
DGL Amazon Computers GCN (CSR) on GPU: 0.033462 seconds per iteration, 5510.53 MB peak memory
PyG Amazon Computers GAT (COO) on CPU: 0.550445 seconds per iteration, 5757.47 MB peak memory
PyG Amazon Computers GAT (COO) on GPU: 0.559175 seconds per iteration, 5757.47 MB peak memory
PyG Amazon Computers GAT (CSR) on CPU: 0.559044 seconds per 

### Amazon Photo Dataset

> *correct*



In [None]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid, KarateClub, Amazon, Coauthor, PPI, Reddit, Flickr
import dgl
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, PPIDataset, RedditDataset, FlickrDataset
from dgl.nn.pytorch import GATConv as GATConvDGL, SAGEConv as SAGEConvDGL

# Load datasets for PyG
dataset_amazon_photo_pyg = Amazon(root='/tmp/AmazonPhoto', name='Photo')
data_amazon_photo_pyg = dataset_amazon_photo_pyg[0]
# Load datasets for DGL
graph_amazon_photo_dgl = dgl.graph((data_amazon_photo_pyg.edge_index[0], data_amazon_photo_pyg.edge_index[1]))

# Ensure features and labels are set for DGL graphs
def ensure_dgl_features_and_labels(graph, pyg_data):
    graph.ndata['feat'] = pyg_data.x.clone().detach()
    graph.ndata['label'] = pyg_data.y.clone().detach()
    return graph

graph_amazon_photo_dgl = ensure_dgl_features_and_labels(graph_amazon_photo_dgl, data_amazon_photo_pyg)


# Add self-loops to the DGL graphs to handle 0-in-degree nodes
graphs_dgl = {
    'Amazon Photo': graph_amazon_photo_dgl
}

for name, graph in graphs_dgl.items():
    graphs_dgl[name] = dgl.add_self_loop(graph)

datasets_pyg_amazon_photo = {
    'Amazon Photo': (dataset_amazon_photo_pyg, data_amazon_photo_pyg),
}

datasets_dgl_amazon_photo = {
    'Amazon Photo': graphs_dgl['Amazon Photo'],
}

In [None]:
# Define devices
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define and test models on COO and CSR formats
formats = ['coo', 'csr']
models = {
    'GCN': (GCN, GCN_DGL),
    'GAT': (GAT, GAT_DGL),
    'GraphSAGE': (GraphSAGE, GraphSAGE_DGL),
    #'PNA': (PNA, PNA_DGL)
}

for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg_amazon_photo.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl_amazon_photo.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

PyG Amazon Photo GCN (COO) on CPU: 0.078241 seconds per iteration, 5218.05 MB peak memory
PyG Amazon Photo GCN (COO) on GPU: 0.064290 seconds per iteration, 5218.07 MB peak memory
PyG Amazon Photo GCN (CSR) on CPU: 0.075476 seconds per iteration, 5218.07 MB peak memory
PyG Amazon Photo GCN (CSR) on GPU: 0.082685 seconds per iteration, 5218.07 MB peak memory
DGL Amazon Photo GCN (COO) on CPU: 0.023630 seconds per iteration, 5218.10 MB peak memory
DGL Amazon Photo GCN (COO) on GPU: 0.018715 seconds per iteration, 5218.10 MB peak memory
DGL Amazon Photo GCN (CSR) on CPU: 0.018804 seconds per iteration, 5218.10 MB peak memory
DGL Amazon Photo GCN (CSR) on GPU: 0.019447 seconds per iteration, 5218.10 MB peak memory
PyG Amazon Photo GAT (COO) on CPU: 0.230910 seconds per iteration, 5278.18 MB peak memory
PyG Amazon Photo GAT (COO) on GPU: 0.281870 seconds per iteration, 5277.42 MB peak memory
PyG Amazon Photo GAT (CSR) on CPU: 0.239136 seconds per iteration, 5275.73 MB peak memory
PyG Amazon

### CoauthorCS Dataset

In [None]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid, KarateClub, Amazon, Coauthor, PPI, Reddit, Flickr
import dgl
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, PPIDataset, RedditDataset, FlickrDataset
from dgl.nn.pytorch import GATConv as GATConvDGL, SAGEConv as SAGEConvDGL

# Load datasets for PyG
dataset_coauthor_cs_pyg = Coauthor(root='/tmp/CoauthorCS', name='CS')
data_coauthor_cs_pyg = dataset_coauthor_cs_pyg[0]
# Load datasets for DGL
graph_coauthor_cs_dgl = dgl.graph((data_coauthor_cs_pyg.edge_index[0], data_coauthor_cs_pyg.edge_index[1]))

# Ensure features and labels are set for DGL graphs
def ensure_dgl_features_and_labels(graph, pyg_data):
    graph.ndata['feat'] = pyg_data.x.clone().detach()
    graph.ndata['label'] = pyg_data.y.clone().detach()
    return graph

graph_coauthor_cs_dgl = ensure_dgl_features_and_labels(graph_coauthor_cs_dgl, data_coauthor_cs_pyg)

# Add self-loops to the DGL graphs to handle 0-in-degree nodes
graphs_dgl = {
    'Coauthor CS': graph_coauthor_cs_dgl
}

for name, graph in graphs_dgl.items():
    graphs_dgl[name] = dgl.add_self_loop(graph)

datasets_pyg_CoauthorCS = {
    'Coauthor CS': (dataset_coauthor_cs_pyg, data_coauthor_cs_pyg)
}

datasets_dgl_CoauthorCS = {
    'Coauthor CS': graphs_dgl['Coauthor CS']
}

In [None]:
# Define devices
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define and test models on COO and CSR formats
formats = ['coo', 'csr']
models = {
    'GCN': (GCN, GCN_DGL),
    'GAT': (GAT, GAT_DGL),
    'GraphSAGE': (GraphSAGE, GraphSAGE_DGL),
    #'PNA': (PNA, PNA_DGL)
}

for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg_CoauthorCS.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl_CoauthorCS.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

PyG Coauthor CS GCN (COO) on CPU: 0.249697 seconds per iteration, 6169.25 MB peak memory
PyG Coauthor CS GCN (COO) on GPU: 0.257429 seconds per iteration, 6169.25 MB peak memory
PyG Coauthor CS GCN (CSR) on CPU: 0.258276 seconds per iteration, 6169.25 MB peak memory
PyG Coauthor CS GCN (CSR) on GPU: 0.256737 seconds per iteration, 6169.25 MB peak memory
DGL Coauthor CS GCN (COO) on CPU: 0.613521 seconds per iteration, 6645.16 MB peak memory
DGL Coauthor CS GCN (COO) on GPU: 0.599712 seconds per iteration, 6645.16 MB peak memory
DGL Coauthor CS GCN (CSR) on CPU: 0.601166 seconds per iteration, 6645.16 MB peak memory
DGL Coauthor CS GCN (CSR) on GPU: 0.597603 seconds per iteration, 6645.16 MB peak memory
PyG Coauthor CS GAT (COO) on CPU: 0.673392 seconds per iteration, 6169.25 MB peak memory
PyG Coauthor CS GAT (COO) on GPU: 0.689402 seconds per iteration, 6213.71 MB peak memory
PyG Coauthor CS GAT (CSR) on CPU: 0.664841 seconds per iteration, 6201.96 MB peak memory
PyG Coauthor CS GAT (

In [None]:
import torch
import torch_geometric
from torch_geometric.datasets import Amazon, Coauthor, PPI, Flickr
import dgl
from dgl.data import PPIDataset, FlickrDataset

# Load datasets for PyG
dataset_amazon_computers_pyg = Amazon(root='/tmp/AmazonComputers', name='Computers')
data_amazon_computers_pyg = dataset_amazon_computers_pyg[0]

dataset_amazon_photo_pyg = Amazon(root='/tmp/AmazonPhoto', name='Photo')
data_amazon_photo_pyg = dataset_amazon_photo_pyg[0]

dataset_coauthor_cs_pyg = Coauthor(root='/tmp/CoauthorCS', name='CS')
data_coauthor_cs_pyg = dataset_coauthor_cs_pyg[0]

dataset_coauthor_physics_pyg = Coauthor(root='/tmp/CoauthorPhysics', name='Physics')
data_coauthor_physics_pyg = dataset_coauthor_physics_pyg[0]

dataset_ppi_pyg = PPI(root='/tmp/PPI')
data_ppi_pyg = dataset_ppi_pyg[0]

dataset_flickr_pyg = Flickr(root='/tmp/Flickr')
data_flickr_pyg = dataset_flickr_pyg[0]

# Load datasets for DGL
graph_amazon_computers_dgl = dgl.graph((data_amazon_computers_pyg.edge_index[0], data_amazon_computers_pyg.edge_index[1]))
graph_amazon_photo_dgl = dgl.graph((data_amazon_photo_pyg.edge_index[0], data_amazon_photo_pyg.edge_index[1]))
graph_coauthor_cs_dgl = dgl.graph((data_coauthor_cs_pyg.edge_index[0], data_coauthor_cs_pyg.edge_index[1]))
graph_coauthor_physics_dgl = dgl.graph((data_coauthor_physics_pyg.edge_index[0], data_coauthor_physics_pyg.edge_index[1]))
graph_ppi_dgl = PPIDataset()[0]
graph_flickr_dgl = FlickrDataset()[0]

# Ensure features and labels are set for DGL graphs
def ensure_dgl_features_and_labels(graph, pyg_data):
    graph.ndata['feat'] = pyg_data.x.clone().detach()
    graph.ndata['label'] = pyg_data.y.clone().detach()
    return graph

graph_amazon_computers_dgl = ensure_dgl_features_and_labels(graph_amazon_computers_dgl, data_amazon_computers_pyg)
graph_amazon_photo_dgl = ensure_dgl_features_and_labels(graph_amazon_photo_dgl, data_amazon_photo_pyg)
graph_coauthor_cs_dgl = ensure_dgl_features_and_labels(graph_coauthor_cs_dgl, data_coauthor_cs_pyg)
graph_coauthor_physics_dgl = ensure_dgl_features_and_labels(graph_coauthor_physics_dgl, data_coauthor_physics_pyg)
graph_ppi_dgl = ensure_dgl_features_and_labels(graph_ppi_dgl, data_ppi_pyg)
graph_flickr_dgl = ensure_dgl_features_and_labels(graph_flickr_dgl, data_flickr_pyg)

# Add self-loops to the DGL graphs to handle 0-in-degree nodes
graphs_dgl = {
    'Amazon Computers': graph_amazon_computers_dgl,
    'Amazon Photo': graph_amazon_photo_dgl,
    'Coauthor CS': graph_coauthor_cs_dgl,
    'Coauthor Physics': graph_coauthor_physics_dgl,
    'PPI': graph_ppi_dgl,
    'Flickr': graph_flickr_dgl
}

for name, graph in graphs_dgl.items():
    graphs_dgl[name] = dgl.add_self_loop(graph)

# Print dataset information
def print_pyg_dataset_info(name, dataset, data):
    print(f"PyG {name} Dataset:")
    print(f"  Number of nodes: {data.num_nodes}")
    print(f"  Number of edges: {data.num_edges // 2}")  # edges are doubled
    print(f"  Number of features: {dataset.num_features}")
    print(f"  Number of classes: {dataset.num_classes}")
    print(f"  Edge index shape: {data.edge_index.shape}")
    print(f"  Features shape: {data.x.shape}")
    print(f"  Labels shape: {data.y.shape}\n")

def print_dgl_dataset_info(name, graph):
    print(f"DGL {name} Dataset:")
    print(f"  Number of nodes: {graph.num_nodes()}")
    print(f"  Number of edges: {graph.num_edges() // 2}")  # edges are doubled
    print(f"  Number of features: {graph.ndata['feat'].shape[1]}")
    print(f"  Number of classes: {len(torch.unique(graph.ndata['label']))}")
    print(f"  Edge index shape: {graph.edges(form='all')[0].shape}")
    print(f"  Features shape: {graph.ndata['feat'].shape}")
    print(f"  Labels shape: {graph.ndata['label'].shape}\n")

# Print information for PyG datasets
datasets_pyg = {
    'Amazon Computers': (dataset_amazon_computers_pyg, data_amazon_computers_pyg),
    'Amazon Photo': (dataset_amazon_photo_pyg, data_amazon_photo_pyg),
    'Coauthor CS': (dataset_coauthor_cs_pyg, data_coauthor_cs_pyg),
    'Coauthor Physics': (dataset_coauthor_physics_pyg, data_coauthor_physics_pyg),
    'PPI': (dataset_ppi_pyg, data_ppi_pyg),
    'Flickr': (dataset_flickr_pyg, data_flickr_pyg)
}

for name, (dataset, data) in datasets_pyg.items():
    print_pyg_dataset_info(name, dataset, data)

# Print information for DGL datasets
datasets_dgl = {
    'Amazon Computers': graphs_dgl['Amazon Computers'],
    'Amazon Photo': graphs_dgl['Amazon Photo'],
    'Coauthor CS': graphs_dgl['Coauthor CS'],
    'Coauthor Physics': graphs_dgl['Coauthor Physics'],
    'PPI': graphs_dgl['PPI'],
    'Flickr': graphs_dgl['Flickr']
}

for name, graph in datasets_dgl.items():
    print_dgl_dataset_info(name, graph)


PyG Amazon Computers Dataset:
  Number of nodes: 13752
  Number of edges: 245861
  Number of features: 767
  Number of classes: 10
  Edge index shape: torch.Size([2, 491722])
  Features shape: torch.Size([13752, 767])
  Labels shape: torch.Size([13752])

PyG Amazon Photo Dataset:
  Number of nodes: 7650
  Number of edges: 119081
  Number of features: 745
  Number of classes: 8
  Edge index shape: torch.Size([2, 238162])
  Features shape: torch.Size([7650, 745])
  Labels shape: torch.Size([7650])

PyG Coauthor CS Dataset:
  Number of nodes: 18333
  Number of edges: 81894
  Number of features: 6805
  Number of classes: 15
  Edge index shape: torch.Size([2, 163788])
  Features shape: torch.Size([18333, 6805])
  Labels shape: torch.Size([18333])

PyG Coauthor Physics Dataset:
  Number of nodes: 34493
  Number of edges: 247962
  Number of features: 8415
  Number of classes: 5
  Edge index shape: torch.Size([2, 495924])
  Features shape: torch.Size([34493, 8415])
  Labels shape: torch.Size([

In [None]:
# Define devices
device_cpu = torch.device('cpu')
device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Define and test models on COO and CSR formats
formats = ['coo', 'csr']
models = {
    'GCN': (GCN, GCN_DGL),
    'GAT': (GAT, GAT_DGL),
    'GraphSAGE': (GraphSAGE, GraphSAGE_DGL),
    #'PNA': (PNA, PNA_DGL)
}

for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')


PyG Amazon Computers GCN (COO) on CPU: 0.211223 seconds per iteration, 5105.08 MB peak memory
PyG Amazon Computers GCN (COO) on GPU: 0.187418 seconds per iteration, 5105.54 MB peak memory
PyG Amazon Computers GCN (CSR) on CPU: 0.170395 seconds per iteration, 5124.83 MB peak memory
PyG Amazon Computers GCN (CSR) on GPU: 0.184569 seconds per iteration, 5105.34 MB peak memory
PyG Amazon Photo GCN (COO) on CPU: 0.067607 seconds per iteration, 5105.49 MB peak memory
PyG Amazon Photo GCN (COO) on GPU: 0.069725 seconds per iteration, 5105.53 MB peak memory
PyG Amazon Photo GCN (CSR) on CPU: 0.061381 seconds per iteration, 5039.46 MB peak memory
PyG Amazon Photo GCN (CSR) on GPU: 0.071416 seconds per iteration, 5039.46 MB peak memory
PyG Coauthor CS GCN (COO) on CPU: 0.231826 seconds per iteration, 5052.44 MB peak memory
PyG Coauthor CS GCN (COO) on GPU: 0.229854 seconds per iteration, 5052.44 MB peak memory
PyG Coauthor CS GCN (CSR) on CPU: 0.226484 seconds per iteration, 5073.97 MB peak memo

TypeError: new(): argument 'size' failed to unpack the object at pos 2 with error "type must be tuple of ints,but got float"

In [None]:
dataset_amazon_computers_pyg = Amazon(root='/tmp/AmazonComputers', name='Computers')
data_amazon_computers_pyg = dataset_amazon_computers_pyg[0]

dataset_amazon_photo_pyg = Amazon(root='/tmp/AmazonPhoto', name='Photo')
data_amazon_photo_pyg = dataset_amazon_photo_pyg[0]

dataset_coauthor_cs_pyg = Coauthor(root='/tmp/CoauthorCS', name='CS')
data_coauthor_cs_pyg = dataset_coauthor_cs_pyg[0]

dataset_coauthor_physics_pyg = Coauthor(root='/tmp/CoauthorPhysics', name='Physics')
data_coauthor_physics_pyg = dataset_coauthor_physics_pyg[0]

dataset_ppi_pyg = PPI(root='/tmp/PPI')
data_ppi_pyg = dataset_ppi_pyg[0]

# dataset_reddit_pyg = Reddit(root='/tmp/Reddit')
# data_reddit_pyg = dataset_reddit_pyg[0]

dataset_flickr_pyg = Flickr(root='/tmp/Flickr')
data_flickr_pyg = dataset_flickr_pyg[0]

# Load datasets for DGL
graph_amazon_computers_dgl = dgl.graph((data_amazon_computers_pyg.edge_index[0], data_amazon_computers_pyg.edge_index[1]))
graph_amazon_computers_dgl = ensure_dgl_features_and_labels(graph_amazon_computers_dgl, data_amazon_computers_pyg)

graph_amazon_photo_dgl = dgl.graph((data_amazon_photo_pyg.edge_index[0], data_amazon_photo_pyg.edge_index[1]))
graph_amazon_photo_dgl = ensure_dgl_features_and_labels(graph_amazon_photo_dgl, data_amazon_photo_pyg)

graph_coauthor_cs_dgl = dgl.graph((data_coauthor_cs_pyg.edge_index[0], data_coauthor_cs_pyg.edge_index[1]))
graph_coauthor_cs_dgl = ensure_dgl_features_and_labels(graph_coauthor_cs_dgl, data_coauthor_cs_pyg)

graph_coauthor_physics_dgl = dgl.graph((data_coauthor_physics_pyg.edge_index[0], data_coauthor_physics_pyg.edge_index[1]))
graph_coauthor_physics_dgl = ensure_dgl_features_and_labels(graph_coauthor_physics_dgl, data_coauthor_physics_pyg)

graph_ppi_dgl = PPIDataset()[0]
graph_ppi_dgl = ensure_dgl_features_and_labels(graph_ppi_dgl, data_ppi_pyg)

# graph_reddit_dgl = RedditDataset()[0]
# graph_reddit_dgl = ensure_dgl_features_and_labels(graph_reddit_dgl, data_reddit_pyg)

graph_flickr_dgl = FlickrDataset()[0]
graph_flickr_dgl = ensure_dgl_features_and_labels(graph_flickr_dgl, data_flickr_pyg)

In [None]:
datasets_pyg = {
    #'Karate Club': (dataset_karate_pyg, data_karate_pyg),
    #'Citeseer': (dataset_citeseer_pyg, data_citeseer_pyg),
    #'Cora': (dataset_cora_pyg, data_cora_pyg),
    #'Pubmed': (dataset_pubmed_pyg, data_pubmed_pyg),
    'Amazon Computers': (dataset_amazon_computers_pyg, data_amazon_computers_pyg),
    'Amazon Photo': (dataset_amazon_photo_pyg, data_amazon_photo_pyg),
    'Coauthor CS': (dataset_coauthor_cs_pyg, data_coauthor_cs_pyg),
    'Coauthor Physics': (dataset_coauthor_physics_pyg, data_coauthor_physics_pyg),
    'PPI': (dataset_ppi_pyg, data_ppi_pyg),
    #'Reddit': (dataset_reddit_pyg, data_reddit_pyg),
    'Flickr': (dataset_flickr_pyg, data_flickr_pyg)
}

In [None]:
datasets_dgl = {
    #'Karate Club': graph_karate_dgl,
    #'Citeseer': graph_citeseer_dgl,
    #'Cora': graph_cora_dgl,
    #'Pubmed': graph_pubmed_dgl,
    'Amazon Computers': graph_amazon_computers_dgl,
    'Amazon Photo': graph_amazon_photo_dgl,
    'Coauthor CS': graph_coauthor_cs_dgl,
    'Coauthor Physics': graph_coauthor_physics_dgl,
    'PPI': graph_ppi_dgl,
    #'Reddit': graph_reddit_dgl,
    'Flickr': graph_flickr_dgl
}

In [None]:
for model_name, (ModelPyG, ModelDGL) in models.items():
    for dataset_name, (dataset_pyg, data_pyg) in datasets_pyg.items():
        # Calculate degree tensor for PyG PNA model
        if model_name == 'PNA':
            deg = torch_geometric.utils.degree(data_pyg.edge_index[0], data_pyg.num_nodes).float()
        for fmt in formats:
            if model_name == 'PNA':
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes, deg=deg)
            else:
                model_pyg = ModelPyG(dataset_pyg.num_features, dataset_pyg.num_classes)
            time_cpu, mem_cpu = profile_model(model_pyg, data_pyg, device_cpu, format=fmt)
            time_gpu, mem_gpu = profile_model(model_pyg, data_pyg, device_gpu, format=fmt)
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'PyG {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

    for dataset_name, graph_dgl in datasets_dgl.items():
        for fmt in formats:
            input_dim = graph_dgl.ndata['feat'].shape[1]
            output_dim = graph_dgl.ndata['label'].max().item() + 1
            model_dgl = ModelDGL(input_dim, output_dim)
            time_cpu, mem_cpu = profile_model(model_dgl, graph_dgl, device_cpu, dgl=True, format=fmt)
            time_gpu, mem_gpu = profile_model(model_dgl, graph_dgl, device_gpu, dgl=True, format=fmt)
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on CPU: {time_cpu:.6f} seconds per iteration, {mem_cpu:.2f} MB peak memory')
            print(f'DGL {dataset_name} {model_name} ({fmt.upper()}) on GPU: {time_gpu:.6f} seconds per iteration, {mem_gpu:.2f} MB peak memory')

PyG Amazon Computers GCN (COO) on CPU: 0.173519 seconds per iteration, 7115.42 MB peak memory
PyG Amazon Computers GCN (COO) on GPU: 0.144428 seconds per iteration, 7103.64 MB peak memory
PyG Amazon Computers GCN (CSR) on CPU: 0.146450 seconds per iteration, 7103.64 MB peak memory
PyG Amazon Computers GCN (CSR) on GPU: 0.147744 seconds per iteration, 7103.64 MB peak memory
PyG Amazon Photo GCN (COO) on CPU: 0.064113 seconds per iteration, 7103.64 MB peak memory
PyG Amazon Photo GCN (COO) on GPU: 0.074729 seconds per iteration, 7103.64 MB peak memory
PyG Amazon Photo GCN (CSR) on CPU: 0.064957 seconds per iteration, 7103.64 MB peak memory
PyG Amazon Photo GCN (CSR) on GPU: 0.074434 seconds per iteration, 7103.64 MB peak memory
PyG Coauthor CS GCN (COO) on CPU: 0.240318 seconds per iteration, 7103.65 MB peak memory
PyG Coauthor CS GCN (COO) on GPU: 0.238295 seconds per iteration, 7103.65 MB peak memory
PyG Coauthor CS GCN (CSR) on CPU: 0.231548 seconds per iteration, 7103.65 MB peak memo

DGLError: There are 0-in-degree nodes in the graph, output for those nodes will be invalid. This is harmful for some applications, causing silent performance regression. Adding self-loop on the input graph by calling `g = dgl.add_self_loop(g)` will resolve the issue. Setting ``allow_zero_in_degree`` to be `True` when constructing this module will suppress the check and let the code run.