In [1]:
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.data import InMemoryDataset

In [2]:
class MyDataset(InMemoryDataset):
    def __init__(self, root, data_list, transform=None):
        self.data_list = data_list
        super().__init__(root, transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def processed_file_names(self):
        return 'data.pt'

    def process(self):
        torch.save(self.collate(self.data_list), self.processed_paths[0])

In [3]:
mydataset = MyDataset('./data',None)

In [4]:
print(f'Dataset: {mydataset}:')
print('====================')
print(f'Number of graphs: {len(mydataset)}')
print(f'Number of node features: {mydataset.num_features}')
print(f'Number of edge features: {mydataset.num_edge_features}')
print(f'Number of classes: {mydataset.num_classes}')

Dataset: MyDataset(924):
Number of graphs: 924
Number of node features: 1
Number of edge features: 5
Number of classes: 2


In [5]:
train_loader = DataLoader(mydataset, batch_size=16, shuffle=True)
test_loader = DataLoader(mydataset, batch_size=16, shuffle=False)

In [6]:
from typing import Tuple, Union

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.nn import BatchNorm1d, Linear

from torch_geometric.nn.conv import MessagePassing
from torch_geometric.typing import Adj, OptTensor, PairTensor


class CGConv(MessagePassing):

    def __init__(self, channels: Union[int, Tuple[int, int]], dim: int = 0,
                 aggr: str = 'add', batch_norm: bool = False,
                 bias: bool = True, **kwargs):
        super().__init__(aggr=aggr, **kwargs)
        # print(channels)
        self.channels = channels
        self.dim = dim
        self.batch_norm = batch_norm

        if isinstance(channels, int):
            channels = (channels, channels)
        self.lin_f = Linear(2*channels[0] + dim, channels[1], bias=bias)
        # self.lin_s = Linear(2*channels[0] + dim, channels[1], bias=bias)
        if batch_norm:
            self.bn = BatchNorm1d(channels[1])
        else:
            self.bn = None

        self.reset_parameters()


    def reset_parameters(self):
        self.lin_f.reset_parameters()
        # self.lin_s.reset_parameters()
        if self.bn is not None:
            self.bn.reset_parameters()


    def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj,
                edge_attr: OptTensor = None) -> Tensor:
        """"""
        if isinstance(x, Tensor):
            x: PairTensor = (x, x)

        # propagate_type: (x: PairTensor, edge_attr: OptTensor)
        out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None)
        out = out if self.bn is None else self.bn(out)
        out += x[1]
        # print(out.shape)
        # print(x[1])
        return out


    def message(self, x_i, x_j, edge_attr: OptTensor) -> Tensor:
        if edge_attr is None:
            z = torch.cat([x_i, x_j], dim=-1)
        else:
            z = torch.cat([x_i, x_j, edge_attr], dim=-1)

        # print(z.shape)
        # print(self.lin_f)
        return self.lin_f(z)

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}({self.channels}, dim={self.dim})'

In [8]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
#         torch.manual_seed(12345)
        self.conv1 = CGConv((mydataset.num_node_features, hidden_channels),mydataset.num_edge_features,batch_norm=True)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = CGConv((hidden_channels, hidden_channels),mydataset.num_edge_features)
        self.conv4 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, mydataset.num_classes)

    def forward(self, x, edge_index, edge_attrib,batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index,edge_attrib)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index,edge_attrib)
        x = x.relu()
        x = self.conv4(x, edge_index)
 

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
#         x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): CGConv((1, 64), dim=5)
  (conv2): GCNConv(64, 64)
  (conv3): CGConv((64, 64), dim=5)
  (conv4): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [9]:
data =  next(iter(train_loader))

In [10]:
data

DataBatch(x=[1006435, 1], edge_index=[2, 1592636], edge_attr=[1592636, 5], y=[16], batch=[1006435], ptr=[17])

In [7]:
model = GCN(hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

NameError: name 'GCN' is not defined

In [11]:
count = 0
def train():
    model.train()
    count = 0
    for data in train_loader:  # Iterate in batches over the training dataset.
        if data.x.shape[0]==0:
            count+=1
            continue
        data.edge_attr = data.edge_attr.float() 
        data = data.to(device)
#         out = model(data.x, data.edge_index, data.batch)
#         print(data.shape)
        out = model(data.x, data.edge_index,data.edge_attr, data.batch)  # Perform a single forward pass.
#         print(out.shape)
#         break
        if out.shape[0] != data.y.shape[0]:
            count+=1
#             print((out.shape,data.y.shape))
            continue
#         print((out.shape[0],data.y.shape[0]))
#         break
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
#     print(count)
    return loss.cpu().item()
            
def test(loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        if data.x.shape[0]==0:
            continue
        data.edge_attr = data.edge_attr.float() 
        data = data.to(device)
        out = model(data.x, data.edge_index,data.edge_attr, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        if pred.shape[0]!=data.y.shape[0]:
            continue
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.
train_acc = test(test_loader)
print(train_acc)

0.34415584415584416


In [14]:

for epoch in range(1, 200):
    loss = train()
    if epoch%5==0:
        train_acc = test(test_loader)
#     test_acc = test(test_loader)
        print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, loss : {loss:.4f}')

Epoch: 005, Train Acc: 0.5087, loss : 0.4118
Epoch: 010, Train Acc: 0.5184, loss : 0.7804
Epoch: 015, Train Acc: 0.5281, loss : 0.6356
Epoch: 020, Train Acc: 0.5758, loss : 0.2986
Epoch: 025, Train Acc: 0.5649, loss : 1.3397
Epoch: 030, Train Acc: 0.5866, loss : 0.2545
Epoch: 035, Train Acc: 0.5768, loss : 0.2845
Epoch: 040, Train Acc: 0.5628, loss : 0.2956
Epoch: 045, Train Acc: 0.5898, loss : 0.1581
Epoch: 050, Train Acc: 0.5942, loss : 0.1718
Epoch: 055, Train Acc: 0.5866, loss : 0.2280
Epoch: 060, Train Acc: 0.5931, loss : 0.2670
Epoch: 065, Train Acc: 0.5974, loss : 0.2081
Epoch: 070, Train Acc: 0.5909, loss : 0.3082
Epoch: 075, Train Acc: 0.5952, loss : 0.1909
Epoch: 080, Train Acc: 0.5855, loss : 0.2110
Epoch: 085, Train Acc: 0.5974, loss : 0.3749
Epoch: 090, Train Acc: 0.5887, loss : 0.6457
Epoch: 095, Train Acc: 0.5758, loss : 0.3755
Epoch: 100, Train Acc: 0.6071, loss : 0.2431
Epoch: 105, Train Acc: 0.5920, loss : 0.1262
Epoch: 110, Train Acc: 0.5996, loss : 0.1372
Epoch: 115

KeyboardInterrupt: 

In [15]:
from sklearn.metrics import confusion_matrix

In [18]:
def test(loader):
    m=model.eval().cpu()    
    ground_lb=[]
    pred_lb=[]
    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        if data.x.shape[0]==0:
            continue
        data.edge_attr = data.edge_attr.float() 
        
        out = model(data.x, data.edge_index,data.edge_attr, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        
        ground_lb.append(data.y)
        pred_lb.append(pred)
        
        
    return (ground_lb,pred_lb)

In [19]:
data = test(test_loader)

In [23]:
gr = torch.concat(data[0])
pr = torch.concat(data[1])

In [24]:
confusion_matrix(gr,pr)

ValueError: Found input variables with inconsistent numbers of samples: [876, 760]