In [1]:
import torch
print(torch.__version__)

1.10.0


In [2]:
#First install dependences to pytorch geometric
"""
pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric-f https://data.pyg.org/whl/torch-1.10.0+cu113.html

https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html
"""

'\npip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric-f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n\nhttps://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html\n'

In [3]:
# pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0.html
# pip install torch-sparse -f https://data.pyg.org/whl/torch-1.10.0.html
# pip install torch-geometric

In [4]:
#Importing the librarys
import torch
from torch_geometric.data import Data

https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html

![graph.svg](attachment:graph.svg)

In [5]:
"""
Represents the edges at the graph above
where first row represent one way to conecte(like 0 to 1)
and second row the 'back' with (1 to 0).
"""
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)

edge_index

tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])

In [6]:
"""
We can write the edge index like a tensor off pairs
using edge_index.contiguous()
"""
edge_index.t().contiguous()

tensor([[0, 1],
        [1, 0],
        [1, 2],
        [2, 1]])

In [7]:
"""
X represetns the value of some metric to each node
following the numeric order of node (0,1,2 like in the example)
"""
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
x

tensor([[-1.],
        [ 0.],
        [ 1.]])

In [8]:
data = Data(x=x, edge_index=edge_index.t().contiguous())
data

Data(x=[3, 1], edge_index=[4, 2])

In [9]:
"""
Some tips of commands that help us to verify if the graph is correctly contructed
and how send this data to GPU for training (in future)
"""
print(f"Number of nodes at the graph: {data.num_nodes}")

print(f"Number of edges at the graph: {data.num_edges}")

print(f"Number of features (metrics) at the graph {data.num_node_features}")

# Transfer data object to GPU.
device = torch.device('cuda')
#Come back data to CPU
data = data.to('cpu')

Number of nodes at the graph: 3
Number of edges at the graph: 2
Number of features (metrics) at the graph 1


### Example with Protein dataset

In [10]:
from torch_geometric.datasets import TUDataset
# https://chrsmrrs.github.io/datasets/

In [11]:
#Load the enzine dataset that are compost with 600 graphs with 6 classes
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

In [12]:
print(f"Size of dataset: {len(dataset)}")
print(f"Number of classes: {dataset.num_classes}")
print(f"Features per node: {dataset.num_node_features}")

Size of dataset: 600
Number of classes: 6
Features per node: 3


In [111]:
"""
Acessing the first graph at the dataset we can see that have
84 undirect edges (168/2), with 33 nodes where each node gave
3 feature value and the classe of this graph is 1 (y value)
"""
dataset[0]

Data(edge_index=[2, 128], x=[33, 3], y=[1])

### Separing data in training and test

In [14]:
"""
Setting the training set with 90% of data and test with 10%
"""
#How much samples will have in traing
numb_traing_samples = int(len(dataset) * 0.9)

#Shuffling the dataset
dataset = dataset.shuffle()

#Defining how are the traing
train_dataset = dataset[:numb_traing_samples]

#And choosen the test
test_dataset = dataset[numb_traing_samples:]

## Creating DataLoader

In [15]:
from torch_geometric.loader import DataLoader

In [67]:
"""
Dataloader is the structure responsable to generate the batchs for network,
when called return a set of graphs (the batch size)
"""
loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [68]:
#Way to see what return at each interaction in dataloader
#The variable batch shows the index of each node at the batch
next(iter(loader))

DataBatch(edge_index=[2, 3912], x=[1002, 3], y=[32], batch=[1002], ptr=[33])

In [69]:
print(f"Each batch have: {int(3912/2)} edges, 1002 nodes with 3 features each")

Each batch have: 1956 edges, 1002 nodes with 3 features each


## Network (GCN)

In [19]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

In [28]:
"""
Define the GCN convolutional network, with 2 convolutional layers
with activation ReLU and softmax activation for total of classes.
"""
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(train_dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, train_dataset.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [76]:
#If you have a GPU and cuda installed this part set to use GPU in training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = loader

#Select how optimizer will be used to adjust the network weights
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [None]:
https://towardsdatascience.com/hands-on-graph-neural-networks-with-pytorch-pytorch-geometric-359487e221a8

In [109]:
train_dataset.num_node_features

3

In [135]:
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

In [136]:
a.x

tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        ...,
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.]])

In [137]:
a.batch

tensor([ 0,  0,  0,  ..., 31, 31, 31])

In [146]:
torch.cat([gmp(a.x, a.batch), gap(a.x, a.batch)],dim=1)

tensor([[1.0000, 1.0000, 0.0000, 0.4444, 0.5556, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.4750, 0.5250, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.3448, 0.6552, 0.0000],
        [1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.4872, 0.5128, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.5227, 0.4773, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.3793, 0.6207, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.4615, 0.5385, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.4800, 0.5200, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.6061, 0.3939, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.2727, 0.7273, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.3333, 0.6667, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.7059, 0.2941, 0.0000],
        [1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.5385, 0.4615, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.5556, 0.4444, 0.0000],
        [1.0000, 1.0000, 0.0000, 0.5714, 0.4286, 0.0000],
        [1.000

In [None]:
train_dataset[539].x.shape[0]

In [81]:
out = model(next(iter(loader)).to(device))

In [125]:
out.shape

torch.Size([1021, 6])

In [126]:
a.y.shape

torch.Size([32])

In [66]:
train_dataset[0]

Data(edge_index=[2, 128], x=[33, 3], y=[1])

In [105]:
#Set the model in training model (use the forward function of the class)
#and adjust the weights value after each epoch
model.train()

for data in loader:
    #Inforns that optimizer need a gradient values adjust
    optimizer.zero_grad()
    #Get the result of network (softmax output)
    out = model(data.to(device))
    #Calculate the loss
    loss = F.nll_loss(out, data.y)
    #Do the backpropragation (adjusting the weight of layers)
    loss.backward()
    #Set the new weights and got to next step
    optimizer.step()

ValueError: Expected input batch_size (1021) to match target batch_size (32).