In [1]:
import torch
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
import torch.nn.functional as F
import torch.nn as nn
from datetime import datetime

import itertools
import os
#from utils import *


os.environ["DGLBACKEND"] = "pytorch"

import time
import copy
import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse as sp
import torch.nn.utils.prune as prune


In [2]:
import warnings
warnings.filterwarnings('ignore')
import sys, os
sys.path.append(os.path.abspath("../"))

import torch


In [3]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')


In [34]:
def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int:
    """
    calculate the total number of parameters of model
    :param count_nonzero_only: only count nonzero weights
    """
    num_counted_elements = 0
    for param in model.parameters():
        if count_nonzero_only:
            num_counted_elements += param.count_nonzero()
        else:
            num_counted_elements += param.numel()
    return num_counted_elements


def get_model_size(model: nn.Module, data_width=32, count_nonzero_only=False) -> int:
    """
    calculate the model size in bits
    :param data_width: #bits per element
    :param count_nonzero_only: only count nonzero weights
    """
    return get_num_parameters(model, count_nonzero_only) * data_width

Byte = 8
KiB = 1024 * Byte
MiB = 1024 * KiB
GiB = 1024 * MiB



In [9]:


class GCN(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
    
model = GCN(dataset.num_features, dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


In [5]:
def test():
   
        
    logits, accs = model(dataset[0]), []
    for _, mask in dataset[0]('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(dataset[0].y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs  

In [12]:
for epoch in range(1, 100):
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model(dataset[0]), dataset[0].y).backward()
    optimizer.step()
    
    logits= model(dataset[0])
    pred = logits[mask].max(1)[1]
    val_acc = pred.eq(dataset[0].y['val_mask']).sum().item() /dataset[0].y['val_mask'].sum().item()
    test_acc = pred.eq(dataset[0].y['test_mask']).sum().item() /dataset[0].y['test_mask'].sum().item()
    
   
    if epoch % 20 == 0:
        print(f'Epoch: {epoch:03d},  Val: {val_acc:.4f}, Test: {test_acc:.4f}')


KeyError: 0

In [6]:
for epoch in range(1, 100):
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model(dataset[0]), dataset[0].y).backward()
    optimizer.step()
    
    
    
    
    train_acc, val_acc, test_acc = test()
    if epoch % 20 == 0:
        print(f'Epoch: {epoch:03d}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')


Epoch: 020, Train: 0.8357, Val: 0.8300, Test: 0.8520
Epoch: 040, Train: 0.8857, Val: 0.9000, Test: 0.8990
Epoch: 060, Train: 0.9357, Val: 0.9200, Test: 0.9210
Epoch: 080, Train: 0.9643, Val: 0.9220, Test: 0.9180


In [7]:
 train_acc, val_acc, test_acc = test()

In [8]:
print( train_acc, val_acc, test_acc )

0.9571428571428572 0.938 0.926


In [None]:
http://localhost:8888/notebooks/GNN/Node-Classification/GCN-Torch-Geo/Node_Classificaton_GCN_Geometric.ipynb

In [15]:
import os.path as osp

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import SplineConv
from torch_geometric.typing import WITH_TORCH_SPLINE_CONV

In [16]:
if not WITH_TORCH_SPLINE_CONV:
    quit("This example requires 'torch-spline-conv'")

In [19]:
dataset = 'Cora'
transform = T.Compose([
    T.RandomNodeSplit(num_val=500, num_test=500),
    T.TargetIndegree(),
])
path = osp.join( 'data', dataset)
dataset = Planetoid(path, dataset, transform=transform)
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [20]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2)
        self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2)

    def forward(self):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = F.dropout(x, training=self.training)
        x = F.elu(self.conv1(x, edge_index, edge_attr))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, edge_attr)
        return F.log_softmax(x, dim=1)

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-3)

In [None]:
def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()


@torch.no_grad()
def test():
    model.eval()
    log_probs, accs = model(), []
    for _, mask in data('train_mask', 'test_mask'):
        pred = log_probs[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs


for epoch in range(1, 201):
    train()
    train_acc, test_acc = test()
    print(f'Epoch: {epoch:03d}, Train: {train_acc:.4f}, Test: {test_acc:.4f}')

Epoch: 001, Train: 0.6382, Test: 0.6080
Epoch: 002, Train: 0.6347, Test: 0.5960
Epoch: 003, Train: 0.6446, Test: 0.5880
Epoch: 004, Train: 0.6926, Test: 0.6300
Epoch: 005, Train: 0.7664, Test: 0.7080
Epoch: 006, Train: 0.8513, Test: 0.7880
Epoch: 007, Train: 0.8975, Test: 0.8320
Epoch: 008, Train: 0.9081, Test: 0.8400
Epoch: 009, Train: 0.9133, Test: 0.8520
Epoch: 010, Train: 0.9145, Test: 0.8560
Epoch: 011, Train: 0.9174, Test: 0.8660
Epoch: 012, Train: 0.9215, Test: 0.8720
Epoch: 013, Train: 0.9274, Test: 0.8820
Epoch: 014, Train: 0.9297, Test: 0.8900
Epoch: 015, Train: 0.9315, Test: 0.8900
Epoch: 016, Train: 0.9350, Test: 0.8900
Epoch: 017, Train: 0.9391, Test: 0.8920
Epoch: 018, Train: 0.9479, Test: 0.8880
Epoch: 019, Train: 0.9502, Test: 0.8880
Epoch: 020, Train: 0.9532, Test: 0.8980
Epoch: 021, Train: 0.9567, Test: 0.9000
Epoch: 022, Train: 0.9608, Test: 0.9020
Epoch: 023, Train: 0.9637, Test: 0.9040
Epoch: 024, Train: 0.9637, Test: 0.9120
Epoch: 025, Train: 0.9649, Test: 0.9140


Epoch: 059, Train: 0.9848, Test: 0.9020
Epoch: 104, Train: 0.9830, Test: 0.9100

## Manual Measurement

In [7]:
import statistics as stat

sparsity=0.0
Eva_final=dict()


Base_model_accuracy=[]
T_base_model=[]
Num_parm_base_model=[]
Base_model_size=[]

Pruned_model_accuracy=[]
T_pruned_model=[]
Num_parm_pruned_model=[]
Pruned_model_size=[]

Pruned_finetune_model_accuracy=[]
T_pruned_finetune_model=[]
Num_parm_pruned_finetune_model=[]
Pruned_finetune_model_size=[]

In [20]:
T_pruned_finetune_model

[0.015624761581420898,
 0.015621662139892578,
 0.01562047004699707,
 0.015625953674316406,
 0.015632152557373047,
 0.015627384185791016,
 0.015624284744262695,
 0.015650033950805664,
 0.01562786102294922,
 0.015625]

In [21]:
Eva_final=dict()
base_model_accuracy_mean = stat.mean(Base_model_accuracy)
base_model_accuracy_std =  stat.stdev(Base_model_accuracy)
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)

Eva_final.update({'base model accuracy':float(format(base_model_accuracy_mean, '.4f'))})
                 
t_base_model_mean =stat.mean(T_base_model)
#t_base_model_std =t_base_model.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'time inference of base model':float(format(t_base_model_mean, '.6f'))})

num_parm_base_model_mean = stat.mean(Num_parm_base_model)
#num_parm_base_model_std = num_parm_base_model.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'number parmameters of base model':num_parm_base_model_mean})

base_model_size_mean = stat.mean(Base_model_size)
#base_model_size_std = base_model_size.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'base_model_size':base_model_size_mean})

#################################

pruned_model_accuracy_mean =stat.mean(Pruned_model_accuracy)
pruned_model_accuracy_std = stat.stdev(Pruned_model_accuracy)
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'pruned model accuracy':float(format(pruned_model_accuracy_mean, '.4f'))})
                 

t_pruned_model_mean = stat.mean(T_pruned_model)
#t_base_model_std =t_dence_model.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'time inference of pruned model':float(format(t_pruned_model_mean, '.6f'))})

num_parm_pruned_model_mean = stat.mean(Num_parm_pruned_model)
#num_parm_base_model_std = num_parm_base_model.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'number parmameters of pruned model':num_parm_pruned_model_mean})

pruned_model_size_mean =stat.mean( Pruned_model_size)
#base_model_size_std = base_model_size.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'pruned model size':pruned_model_size_mean})

#################################
pruned_finetune_model_accuracy_mean =stat.mean(Pruned_finetune_model_accuracy)
pruned_finetune_model_accuracy_std = stat.stdev(Pruned_finetune_model_accuracy)
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'pruned finetune model accuracy':float(format(pruned_finetune_model_accuracy_mean, '.4f'))})
                 

t_pruned_finetune_model_mean =stat.mean(T_pruned_finetune_model)
#t_base_model_std =t_dence_model.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'time inference of pruned finetune model':float(format(t_pruned_finetune_model_mean,'.6f'))})

num_parm_pruned_finetune_model_mean =stat.mean(Num_parm_pruned_finetune_model)
#num_parm_base_model_std = num_parm_base_model.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'number parmameters of pruned finetune model':num_parm_pruned_finetune_model_mean})

pruned_finetune_model_size_mean = stat.mean(Pruned_finetune_model_size)
#base_model_size_std = base_model_size.std()
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'pruned finetune model size':pruned_finetune_model_size_mean})


#################################


print(f"All measurement about pruning process of sparsity:{sparsity*100}% ")   
Eva_final

All measurement about pruning process of sparsity:0.0% 


{'base model accuracy': 0.9488,
 'time inference of base model': 0.013151,
 'number parmameters of base model': 23063,
 'base_model_size': 738016,
 'pruned model accuracy': 0.9488,
 'time inference of pruned model': 0.014715,
 'number parmameters of pruned model': 23063,
 'pruned model size': 738016,
 'pruned finetune model accuracy': 0.9668,
 'time inference of pruned finetune model': 0.015628,
 'number parmameters of pruned finetune model': 23063,
 'pruned finetune model size': 738016}

In [22]:
Cora_Node_00={'base model accuracy': 0.9488,
 'time inference of base model': 0.013151,
 'number parmameters of base model': 23063,
 'base_model_size': 738016,
 'pruned model accuracy': 0.9488,
 'time inference of pruned model': 0.014715,
 'number parmameters of pruned model': 23063,
 'pruned model size': 738016,
 'pruned finetune model accuracy': 0.9668,
 'time inference of pruned finetune model': 0.015628,
 'number parmameters of pruned finetune model': 23063,
 'pruned finetune model size': 738016}