# Data handling of graphs
A graph is used to model pairwise relations (edges) between objects (nodes). A single graph in PyTorch Geometric is described by an instance of torch_geometric.data.Data, which holds the following attributes by default:

- data.x: Node feature matrix with shape [num_nodes, num_node_features]
- data.edge_index: Graph connectivity in COO format with shape [2, num_edges] and type torch.long
- data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
- data.y: Target to train against (may have arbitrary shape)
- data.pos: Node position matrix with shape [num_nodes, num_dimensions]

None of these attributes is required. In fact, the Data object is not even restricted to these attributes. We can, e.g., extend it by data.face to save the connectivity of triangles from a 3D mesh in a tensor with shape [3, num_faces] and type torch.long.

In [3]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)

print(data)

Data(edge_index=[2, 4], x=[3, 1])


In [4]:
# when edge_index defined as a list of tuples (closer to math notation)
# then use .contiguous()
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())
print(data)

Data(edge_index=[2, 4], x=[3, 1])


In [17]:
# utility functions
print(data.keys)
print(data['x'])
for key,item in data:
    print('{} found in data'.format(key))
print(data.num_nodes)
print(data.num_edges)
print(data.num_features)
print(data.contains_isolated_nodes())
print(data.contains_self_loops())
print(data.is_directed())
device = torch.device('cuda')
data = data.to(device)

['x', 'edge_index']
tensor([[-1.],
        [ 0.],
        [ 1.]])
edge_index found in data
x found in data
3
4
1
False
False
False


###  Common benchmark datasets

In [18]:
# initialization of a dataset (download and process)
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
print(dataset)
print(len(dataset))
print(dataset.num_classes)
print(dataset.num_features)

Downloading https://ls11-www.cs.uni-dortmund.de/people/morris/graphkerneldatasets/ENZYMES.zip
Extracting /tmp/ENZYMES/ENZYMES.zip
Processing...
Done!
ENZYMES(600)
600
6
3


In [23]:
print(dataset[0])
print(data.is_undirected())

Data(edge_index=[2, 168], x=[37, 3], y=[1])
True


In [29]:
# using slices
train_dataset = dataset[:540]
print(train_dataset)
test_dataset = dataset[540:]
print(test_dataset)

# using shuffle
dataset = dataset.shuffle()
print(dataset)
# same as
perm = torch.randperm(len(dataset))
dataset = dataset[perm]
print(dataset)

# splitting dataset into train/test
train_dataset = dataset[60:]
test_dataset = dataset[:60]
print(train_dataset)
print(test_dataset)

ENZYMES(540)
ENZYMES(60)
ENZYMES(600)
ENZYMES(600)
ENZYMES(540)
ENZYMES(60)


In [30]:
# Cora dataset
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
print(dataset)
print(len(dataset))
print(dataset.num_classes)
print(dataset.num_features)

data = dataset[0]
print(data)
print(data.is_undirected())
print(data.train_mask.sum())
print(data.val_mask.sum())
print(data.test_mask.sum())

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!
Cora()
1
7
1433
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
True
tensor(140)
tensor(500)
tensor(1000)


### Mini-batches

In [32]:
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    print(batch)
    print(batch.num_graphs)
    

Batch(batch=[1112], edge_index=[2, 3986], x=[1112, 3], y=[32])
32
Batch(batch=[940], edge_index=[2, 3662], x=[940, 3], y=[32])
32
Batch(batch=[1154], edge_index=[2, 4234], x=[1154, 3], y=[32])
32
Batch(batch=[1141], edge_index=[2, 4374], x=[1141, 3], y=[32])
32
Batch(batch=[1061], edge_index=[2, 4058], x=[1061, 3], y=[32])
32
Batch(batch=[927], edge_index=[2, 3730], x=[927, 3], y=[32])
32
Batch(batch=[1064], edge_index=[2, 4078], x=[1064, 3], y=[32])
32
Batch(batch=[1041], edge_index=[2, 3992], x=[1041, 3], y=[32])
32
Batch(batch=[1164], edge_index=[2, 4286], x=[1164, 3], y=[32])
32
Batch(batch=[848], edge_index=[2, 3280], x=[848, 3], y=[32])
32
Batch(batch=[1048], edge_index=[2, 4104], x=[1048, 3], y=[32])
32
Batch(batch=[1109], edge_index=[2, 4228], x=[1109, 3], y=[32])
32
Batch(batch=[1017], edge_index=[2, 3826], x=[1017, 3], y=[32])
32
Batch(batch=[1149], edge_index=[2, 3940], x=[1149, 3], y=[32])
32
Batch(batch=[961], edge_index=[2, 3660], x=[961, 3], y=[32])
32
Batch(batch=[999],

In [34]:
# usign batch : a column vector of graph identifiers 
# for al nodes of all graphs in the batch
from torch_scatter import scatter_mean
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:
    print(data)
    print(data.num_graphs)
    x = scatter_mean(data.x, data.batch, dim=0)
    print(x.size())
    

Batch(batch=[998], edge_index=[2, 3776], x=[998, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1110], edge_index=[2, 3982], x=[1110, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1060], edge_index=[2, 4032], x=[1060, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1133], edge_index=[2, 4132], x=[1133, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1019], edge_index=[2, 4070], x=[1019, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1010], edge_index=[2, 3964], x=[1010, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1080], edge_index=[2, 3990], x=[1080, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1168], edge_index=[2, 4208], x=[1168, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1053], edge_index=[2, 4112], x=[1053, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[987], edge_index=[2, 3896], x=[987, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1010], edge_index=[2, 3994], x=[1010, 3], y=[32])
32
torch.Size([32, 3])
Batch(batch=[1087], edge_index=[2, 3954], x=[1087, 3], y=[

### Data Transforms

In [36]:
# transform get a graph as Data and return a graph
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', category='Airplane')

print(dataset[0])

Data(pos=[2518, 3], y=[2518])


In [43]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

print(dir(T))

data = ShapeNet(root='/tmp/ShapeNet', category='Airplane',
                    pre_transform=T.KNNGraph(k=6))

print(data[0])

['AddSelfLoops', 'Cartesian', 'Center', 'Compose', 'Constant', 'Distance', 'FaceToEdge', 'KNNGraph', 'LinearTransformation', 'LocalCartesian', 'NormalizeFeatures', 'NormalizeScale', 'OneHotDegree', 'Polar', 'RadiusGraph', 'RandomFlip', 'RandomRotate', 'RandomScale', 'RandomShear', 'RandomTranslate', 'SamplePoints', 'Spherical', 'TargetIndegree', 'ToDense', 'TwoHop', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'add_self_loops', 'cartesian', 'center', 'compose', 'constant', 'distance', 'face_to_edge', 'knn_graph', 'linear_transformation', 'local_cartesian', 'normalize_features', 'normalize_scale', 'one_hot_degree', 'polar', 'radius_graph', 'random_flip', 'random_rotate', 'random_scale', 'random_shear', 'random_translate', 'sample_points', 'spherical', 'target_indegree', 'to_dense', 'two_hop']
Data(pos=[2518, 3], y=[2518])


### Learning methods on graphs

In [45]:
# example of GCN on Cora datasest

from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [47]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [51]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
   
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    print(epoch, loss)

0 tensor(1.9391, device='cuda:0', grad_fn=<NllLossBackward>)
1 tensor(1.8491, device='cuda:0', grad_fn=<NllLossBackward>)
2 tensor(1.7233, device='cuda:0', grad_fn=<NllLossBackward>)
3 tensor(1.5967, device='cuda:0', grad_fn=<NllLossBackward>)
4 tensor(1.4528, device='cuda:0', grad_fn=<NllLossBackward>)
5 tensor(1.3172, device='cuda:0', grad_fn=<NllLossBackward>)
6 tensor(1.2246, device='cuda:0', grad_fn=<NllLossBackward>)
7 tensor(1.0772, device='cuda:0', grad_fn=<NllLossBackward>)
8 tensor(0.9786, device='cuda:0', grad_fn=<NllLossBackward>)
9 tensor(0.8883, device='cuda:0', grad_fn=<NllLossBackward>)
10 tensor(0.7864, device='cuda:0', grad_fn=<NllLossBackward>)
11 tensor(0.6765, device='cuda:0', grad_fn=<NllLossBackward>)
12 tensor(0.6059, device='cuda:0', grad_fn=<NllLossBackward>)
13 tensor(0.5335, device='cuda:0', grad_fn=<NllLossBackward>)
14 tensor(0.4865, device='cuda:0', grad_fn=<NllLossBackward>)
15 tensor(0.4145, device='cuda:0', grad_fn=<NllLossBackward>)
16 tensor(0.3495, 

163 tensor(0.0273, device='cuda:0', grad_fn=<NllLossBackward>)
164 tensor(0.0346, device='cuda:0', grad_fn=<NllLossBackward>)
165 tensor(0.0181, device='cuda:0', grad_fn=<NllLossBackward>)
166 tensor(0.0389, device='cuda:0', grad_fn=<NllLossBackward>)
167 tensor(0.0256, device='cuda:0', grad_fn=<NllLossBackward>)
168 tensor(0.0375, device='cuda:0', grad_fn=<NllLossBackward>)
169 tensor(0.0299, device='cuda:0', grad_fn=<NllLossBackward>)
170 tensor(0.0290, device='cuda:0', grad_fn=<NllLossBackward>)
171 tensor(0.0337, device='cuda:0', grad_fn=<NllLossBackward>)
172 tensor(0.0189, device='cuda:0', grad_fn=<NllLossBackward>)
173 tensor(0.0271, device='cuda:0', grad_fn=<NllLossBackward>)
174 tensor(0.0277, device='cuda:0', grad_fn=<NllLossBackward>)
175 tensor(0.0295, device='cuda:0', grad_fn=<NllLossBackward>)
176 tensor(0.0304, device='cuda:0', grad_fn=<NllLossBackward>)
177 tensor(0.0298, device='cuda:0', grad_fn=<NllLossBackward>)
178 tensor(0.0549, device='cuda:0', grad_fn=<NllLossBac

In [49]:
model.eval()
_, pred = model(data).max(dim=1)
correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.8060
