In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import EllipticBitcoinDataset
from torch_geometric.nn import SAGEConv

In [2]:
# Enable torch to use mps
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: mps


In [3]:
dataset = EllipticBitcoinDataset(root='data/Elliptic')

In [4]:
print(f'Number of graphs: {len(dataset)}')
print(f'Number of classes: {dataset.num_classes}')
print(f'Number of node features: {dataset.num_node_features}')
print(f'Number of edge features: {dataset.num_edge_features}')

Number of graphs: 1
Number of classes: 2
Number of node features: 165
Number of edge features: 0


In [5]:
data = dataset[0]
print(data)

Data(x=[203769, 165], edge_index=[2, 234355], y=[203769], train_mask=[203769], test_mask=[203769])


In [6]:
# Print x shape, edge_index shape, y shape
print(f'Node features shape: {data.x.shape}')
print(f'Edge index shape: {data.edge_index.shape}')
print(f'Labels shape: {data.y.shape}')
print(f'Number of training nodes: {data.train_mask.sum().item()}')
print(f'Number of test nodes: {data.test_mask.sum().item()}')
# NOTE: The graph has 203769 nodes and 234355 directed edges.

Node features shape: torch.Size([203769, 165])
Edge index shape: torch.Size([2, 234355])
Labels shape: torch.Size([203769])
Number of training nodes: 29894
Number of test nodes: 16670


In [7]:
class Net(torch.nn.Module):
    def __init__(self, hidden_channels=128, dropout=0.5, layer_kwargs=None):
        super(Net, self).__init__()
        self.conv1 = SAGEConv(dataset.num_node_features, hidden_channels, **(layer_kwargs or {}))
        self.conv2 = SAGEConv(hidden_channels, hidden_channels, **(layer_kwargs or {}))
        self.conv3 = SAGEConv(hidden_channels, dataset.num_classes, **(layer_kwargs or {}))
        self.dropout = dropout
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        
        x = self.conv3(x, edge_index)
        return F.log_softmax(x, dim=1)

data = dataset[0].to(device)
model = Net(hidden_channels=256, dropout=0.3, layer_kwargs={'aggr': 'mean', 'normalize': True}).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=20, factor=0.5)

In [8]:
# Since the dataset is imbalanced, we use a weighted loss function
freq = data.y[data.train_mask].bincount()
weights = (1.0 / freq.float())
loss_fn = torch.nn.NLLLoss(weight=weights.to(device))

In [9]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = loss_fn(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test():
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)
    accs, f1s = [], []
    for mask in [data.train_mask, data.test_mask]:
        # Compute accuracy
        true = data.y[mask]
        predicted = pred[mask]
        correct = predicted == true
        accs.append(int(correct.sum()) / int(mask.sum()))

        # Compute F1 score
        tp = ((predicted == 1) & (true == 1)).sum().item()
        tn = ((predicted == 0) & (true == 0)).sum().item()
        fp = ((predicted == 1) & (true == 0)).sum().item()
        fn = ((predicted == 0) & (true == 1)).sum().item()
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        f1s.append(f1)
    return accs, f1s

In [10]:
for epoch in range(1, 801):
    loss = train()
    scheduler.step(loss)
    if epoch % 10 == 0:
        (train_acc, test_acc), (train_f1, test_f1) = test()
        print(f'Epoch: {epoch:03d}\tLoss: {loss:.4f}\tLR: {optimizer.param_groups[0]["lr"]:.6f}\t'
              f'Train Acc: {train_acc:.4f}\tTest Acc: {test_acc:.4f}\t'
              f'Train F1: {train_f1:.4f}\tTest F1: {test_f1:.4f}')

Epoch: 010	Loss: 0.3341	LR: 0.005000	Train Acc: 0.9466	Test Acc: 0.9023	Train F1: 0.7483	Test F1: 0.4609
Epoch: 020	Loss: 0.3046	LR: 0.005000	Train Acc: 0.9510	Test Acc: 0.9403	Train F1: 0.7477	Test F1: 0.4674
Epoch: 030	Loss: 0.2889	LR: 0.005000	Train Acc: 0.9612	Test Acc: 0.9446	Train F1: 0.8081	Test F1: 0.4643
Epoch: 040	Loss: 0.2788	LR: 0.005000	Train Acc: 0.9775	Test Acc: 0.9487	Train F1: 0.8999	Test F1: 0.5406
Epoch: 050	Loss: 0.2711	LR: 0.005000	Train Acc: 0.9815	Test Acc: 0.9332	Train F1: 0.9203	Test F1: 0.5198
Epoch: 060	Loss: 0.2678	LR: 0.005000	Train Acc: 0.9816	Test Acc: 0.9371	Train F1: 0.9232	Test F1: 0.5502
Epoch: 070	Loss: 0.2617	LR: 0.005000	Train Acc: 0.9820	Test Acc: 0.9254	Train F1: 0.9256	Test F1: 0.5028
Epoch: 080	Loss: 0.2619	LR: 0.005000	Train Acc: 0.9801	Test Acc: 0.9212	Train F1: 0.9183	Test F1: 0.5231
Epoch: 090	Loss: 0.2576	LR: 0.005000	Train Acc: 0.9791	Test Acc: 0.9306	Train F1: 0.9152	Test F1: 0.5602
Epoch: 100	Loss: 0.2579	LR: 0.005000	Train Acc: 0.9843	