# Graph Convolutional Network with GeometrixFlux.jl

This example is adapted from examples/gcm_with_Static_graph.jl in GeometrixFlux.jl. It is the first example that I try to run with this package.

In [3]:
# Install packages - Needed only once   
using Pkg
Pkg.activate(".")

[32m[1m  Activating[22m[39m project at `~/src_nobackup/julia_ml_tests.jl.git/graph_neural_networks`


In [4]:
using CUDA
using Flux
using Flux: onehotbatch, onecold
using Flux.Losses: logitcrossentropy
using Flux.Data: DataLoader
using GeometricFlux
using GeometricFlux.Datasets
using Graphs
using GraphSignals
using Parameters: @with_kw
using ProgressMeter: Progress, next!
using Statistics
using Random

In [22]:
# Some paramaters

η = 0.01                # learning rate
λ = 5f-4                # regularization paramater
batch_size = 64         # batch size
epochs = 200            # number of epochs
seed = 0                # random seed
cuda = true             # use GPU
input_dim = 1433        # input dimension
hidden_dim = 16         # hidden dimension
target_dim = 7          # target dimension
dataset = Cora          # dataset to train on

nothing

In [21]:
using GeometricFlux.Datasets

function load_data(dataset, batch_size, train_repeats=512, test_repeats=32)
    s, t = dataset[1].edge_index
    g = Graphs.Graph(dataset[1].num_nodes)
    for (i, j) in zip(s, t)
        Graphs.add_edge!(g, i, j)
    end

    data = dataset[1].node_data
    X, y = data.features, onehotbatch(data.targets, 1:7)
    train_idx, test_idx = data.train_mask, data.val_mask

    # (train_X, train_y) dim: (num_features, target_dim) × 2708 × train_repeats
    train_X, train_y = repeat(X, outer=(1,1,train_repeats)), repeat(y, outer=(1,1,train_repeats))
    # (test_X, test_y) dim: (num_features, target_dim) × 2708 × test_repeats
    test_X, test_y = repeat(X, outer=(1,1,test_repeats)), repeat(y, outer=(1,1,test_repeats))

    fg = FeaturedGraph(g)
    train_loader = DataLoader((train_X, train_y), batchsize=batch_size, shuffle=true)
    test_loader = DataLoader((test_X, test_y), batchsize=batch_size, shuffle=true)
    return train_loader, test_loader, fg, train_idx, test_idx
end




UndefVarError: UndefVarError: `Planetoid` not defined

In [19]:
# Set seed for reproducibility
Random.seed!(seed)

# GPU config
if cuda && CUDA.has_cuda()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

nothing

┌ Info: Training on CPU
└ @ Main /home/verlaan/src_nobackup/machine_learning_tests.git/geoflux_ex1.ipynb:10


In [20]:
# Dataset 

function load_data(dataset, batch_size, train_repeats=256, test_repeats=32)
    # (train_X, train_y) dim: (num_features, target_dim) × 140
    train_X, train_y = map(x->Matrix(x), traindata(Planetoid(), dataset))
    # (test_X, test_y) dim: (num_features, target_dim) × 1000
    test_X, test_y = map(x->Matrix(x), testdata(Planetoid(), dataset))
    g = graphdata(Planetoid(), dataset)
    train_idx = train_indices(Planetoid(), dataset)
    test_idx = test_indices(Planetoid(), dataset)

    train_data = [(subgraph(FeaturedGraph(g, nf=train_X), train_idx), train_y) for _ in 1:train_repeats]
    test_data = [(subgraph(FeaturedGraph(g, nf=test_X), test_idx), test_y) for _ in 1:test_repeats]
    train_batch = Flux.batch(train_data)
    test_batch = Flux.batch(test_data)

    train_loader = DataLoader(train_batch, batchsize=batch_size, shuffle=true)
    test_loader = DataLoader(test_batch, batchsize=batch_size, shuffle=true)
    return train_loader, test_loader
end


GeometricFlux.Datasets.Planetoid() = PlanetoidDataset()

# load Cora from Planetoid dataset
train_loader, test_loader = load_data(:cora, batch_size)

UndefVarError: UndefVarError: `Planetoid` not defined

In [None]:
# build model
model = Chain(
    GCNConv(input_dim=>hidden_dim, relu),
    GraphParallel(node_layer=Dropout(0.5)),
    GCNConv(hidden_dim=>target_dim),
    node_feature,
) |> device

In [None]:


## Loss: cross entropy with first layer L2 regularization 
l2norm(x) = sum(abs2, x)
function model_loss(model, λ, batch)
    loss = 0.f0
    for (x, y) in batch
        loss += logitcrossentropy(model(x), y)
        loss += λ*sum(l2norm, Flux.params(model[1]))
    end
    return loss
end

function accuracy(model, batch::AbstractVector)
    return mean(mean(onecold(softmax(cpu(model(x)))) .== onecold(cpu(y))) for (x, y) in batch)
end

accuracy(model, loader::DataLoader, device) = mean(accuracy(model, batch |> device) for batch in loader)


In [None]:


function load_data(dataset, batch_size, train_repeats=512, test_repeats=32)
    s, t = dataset[1].edge_index
    g = Graphs.Graph(dataset[1].num_nodes)
    for (i, j) in zip(s, t)
        Graphs.add_edge!(g, i, j)
    end

    data = dataset[1].node_data
    X, y = data.features, onehotbatch(data.targets, 1:7)
    train_idx, test_idx = data.train_mask, data.val_mask

    # (train_X, train_y) dim: (num_features, target_dim) × 2708 × train_repeats
    train_X, train_y = repeat(X, outer=(1,1,train_repeats)), repeat(y, outer=(1,1,train_repeats))
    # (test_X, test_y) dim: (num_features, target_dim) × 2708 × test_repeats
    test_X, test_y = repeat(X, outer=(1,1,test_repeats)), repeat(y, outer=(1,1,test_repeats))

    fg = FeaturedGraph(g)
    train_loader = DataLoader((train_X, train_y), batchsize=batch_size, shuffle=true)
    test_loader = DataLoader((test_X, test_y), batchsize=batch_size, shuffle=true)
    return train_loader, test_loader, fg, train_idx, test_idx
end


## Loss: cross entropy with first layer L2 regularization 
l2norm(x) = sum(abs2, x)

function model_loss(model, λ, X, y, idx)
    loss = logitcrossentropy(model(X)[:,idx,:], y[:,idx,:])
    loss += λ*sum(l2norm, Flux.params(model[1]))
    return loss
end

accuracy(model, X::AbstractArray, y::AbstractArray, idx) =
    mean(onecold(softmax(cpu(model(X))[:,idx,:])) .== onecold(cpu(y)[:,idx,:]))

accuracy(model, loader::DataLoader, device, idx) =
    mean(accuracy(model, X |> device, y |> device, idx) for (X, y) in loader)

function train(; kws...)
    # load hyperparamters
    args = Args(; kws...)
    args.seed > 0 && Random.seed!(args.seed)

    # GPU config
    if args.cuda && CUDA.has_cuda()
        device = gpu
        CUDA.allowscalar(false)
        @info "Training on GPU"
    else
        device = cpu
        @info "Training on CPU"
    end

    # load Cora from Planetoid dataset
    train_loader, test_loader, fg, train_idx, test_idx = load_data(args.dataset(), args.batch_size)
    
    # build model
    model = Chain(
        WithGraph(fg, GCNConv(args.input_dim=>args.hidden_dim, relu)),
        Dropout(0.5),
        WithGraph(fg, GCNConv(args.hidden_dim=>args.target_dim)),
    ) |> device

    # Adam optimizer
    opt = Adam(args.η)
    
    # parameters
    ps = Flux.params(model)

    # training
    train_steps = 0
    @info "Start Training, total $(args.epochs) epochs"
    for epoch = 1:args.epochs
        @info "Epoch $(epoch)"
        progress = Progress(length(train_loader))

        for (X, y) in train_loader
            X, y, device_idx = X |> device, y |> device, train_idx |> device
            loss, back = Flux.pullback(() -> model_loss(model, args.λ, X, y, device_idx), ps)
            train_acc = accuracy(model, train_loader, device, train_idx)
            test_acc = accuracy(model, test_loader, device, test_idx)
            grad = back(1f0)
            Flux.Optimise.update!(opt, ps, grad)

            # progress meter
            next!(progress; showvalues=[
                (:loss, loss),
                (:train_accuracy, train_acc),
                (:test_accuracy, test_acc)
            ])

            train_steps += 1
        end
    end

    return model, args
end

model, args = train()