# Graph Convolutional Network with GeometrixFlux.jl

This example is adapted from examples/gcm_with_Static_graph.jl in GeometrixFlux.jl. It is the first example that I try to run with this package.

## Application to CORA

The CORA dataset is a graph with papers (nodes=2708) and the references between them (edges). For each node there is a vector of length=1433 that 
gives the presence of words. For example for node 1 the words 20  82  147  316  775  878  1195  1248  1275 are present. The task of the network is to classify each node into one of 7 classes, using the presence of the words and the reference links in the graph. The application seems to be taken from
[this paper](https://arxiv.org/pdf/1609.02907.pdf).

In [None]:
# Install packages - Needed only once   
using Pkg
Pkg.activate(".")

In [None]:
using CUDA
using Flux
using Flux: onehotbatch, onecold
using Flux.Losses: logitcrossentropy
using Flux.Data: DataLoader
using GeometricFlux
using GeometricFlux.Datasets
using Graphs
using GraphSignals
using Parameters: @with_kw
using ProgressMeter: Progress, next!
using Statistics
using Random
using Plots, GraphRecipes

In [None]:
# Some paramaters

η = 0.01                # learning rate
λ = 5f-4                # regularization paramater
batch_size = 32         # batch size
epochs = 2              # number of epochs (was 200)
seed = 0                # random seed
cuda = true             # use GPU
input_dim = 1433        # input dimension
hidden_dim = 16         # hidden dimension
target_dim = 7          # target dimension
dataset = Cora          # dataset to train on

nothing

In [None]:
# Fix seed for reproducibility
seed > 0 && Random.seed!(seed)

# GPU config
if cuda && CUDA.has_cuda()
    device = gpu
    CUDA.allowscalar(false)
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end


In [None]:
# dsets=dataset()
# meta=dsets.metadata
# @show meta
# graph1=dsets.graphs[1]
# @show graph1

# features = graph1.node_data.features
# @show findall(x->(x>0.0),features[:,1])'


In [None]:
# Load data

function load_data(dataset, batch_size, train_repeats=512, test_repeats=32)
    s, t = dataset[1].edge_index
    g = Graphs.Graph(dataset[1].num_nodes)
    for (i, j) in zip(s, t)
        Graphs.add_edge!(g, i, j)
    end

    data = dataset[1].node_data
    X, y = data.features, onehotbatch(data.targets, 1:7)
    train_idx, test_idx = data.train_mask, data.val_mask

    #MVL It seems that copies of the entire dataset are made here
    #MVL This seems a waste of memory. Is this done to match the 
    #MVL normal minibatch cycle? Or do they want to create a larger dataser
    #MVL for performance testing? The nodes that are actually used are
    #MVL determined by the train_idx and test_idx masks.
    #MVL This dasaset could be reduced to a fractions of its size for training
    #MVL and testing. This would reduce the memory requirements and speed up
    #MVL the training and testing.
    # (train_X, train_y) dim: (num_features, target_dim) × 2708 × train_repeats
    train_X, train_y = repeat(X, outer=(1,1,train_repeats)), repeat(y, outer=(1,1,train_repeats))
    # (test_X, test_y) dim: (num_features, target_dim) × 2708 × test_repeats
    test_X, test_y = repeat(X, outer=(1,1,test_repeats)), repeat(y, outer=(1,1,test_repeats))

    fg = FeaturedGraph(g)
    train_loader = DataLoader((train_X, train_y), batchsize=batch_size, shuffle=true)
    test_loader = DataLoader((test_X, test_y), batchsize=batch_size, shuffle=true)
    return train_loader, test_loader, fg, train_idx, test_idx
end

# load Cora from Planetoid dataset
train_loader, test_loader, fg, train_idx, test_idx = load_data(dataset(), batch_size)



In [None]:
# Look at data

@show fg #Undirected graph with (#V=2708, #E=5278)
@show train_loader

(train_x, train_y) = first(train_loader)
@show size(train_x) #(1433, 2708, 32) = (input_dim, #V, batch_size)
@show size(train_y) #(7, 2708, 32) = (target_dim, #V, batch_size)
(test_x, test_y) = first(test_loader)
@show size(test_x) #(1433, 2708, 32) = (input_dim, #V, batch_size)  
@show size(test_y) #(7, 2708, 32) = (target_dim, #V, batch_size)

@show count(train_idx.>0) #140
@show count(test_idx.>0) #500

nothing

In [None]:
## Loss: cross entropy with first layer L2 regularization 
l2norm(x) = sum(abs2, x)

function model_loss(model, λ, X, y, idx)
    loss = logitcrossentropy(model(X)[:,idx,:], y[:,idx,:])
    loss += λ*sum(l2norm, Flux.params(model[1]))
    return loss
end

accuracy(model, X::AbstractArray, y::AbstractArray, idx) =
    mean(onecold(softmax(cpu(model(X))[:,idx,:])) .== onecold(cpu(y)[:,idx,:]))

accuracy(model, loader::DataLoader, device, idx) =
    mean(accuracy(model, X |> device, y |> device, idx) for (X, y) in loader)

nothing

In [None]:
# build model
model = Chain(
    WithGraph(fg, GCNConv(input_dim=>hidden_dim, relu)),
    Dropout(0.5),
    WithGraph(fg, GCNConv(hidden_dim=>target_dim)),
) |> device

In [None]:

# Adam optimizer
opt = Adam(η)

# parameters
ps = Flux.params(model)

In [None]:
# Main loop for Training

# training
train_steps = 0
@info "Start Training, total $(epochs) epochs"
for epoch = 1:epochs
    @info "Epoch $(epoch)"
    progress = Progress(length(train_loader))

    for (X, y) in train_loader
        X, y, device_idx = X |> device, y |> device, train_idx |> device
        loss, back = Flux.pullback(() -> model_loss(model, λ, X, y, device_idx), ps)
        train_acc = accuracy(model, train_loader, device, train_idx)
        test_acc = accuracy(model, test_loader, device, test_idx)
        grad = back(1f0)
        Flux.Optimise.update!(opt, ps, grad)

        # progress meter
        next!(progress; showvalues=[
            (:loss, loss),
            (:train_accuracy, train_acc),
            (:test_accuracy, test_acc)
        ])

        train_steps += 1
    end
end

#return model, args
