In [1]:
include("./src/structs.jl")
include("./src/scalar_operators.jl")
include("./src/broadcast_operators.jl")
include("./src/graph.jl")
include("./src/forward.jl")
include("./src/backward.jl")
include("./src/data_loader.jl")
include("./src/nn_model.jl")
include("./src/optimisers.jl")
include("./src/losses.jl")
include("./src/layers.jl")
nothing

In [23]:
using Printf, Random, BenchmarkTools
using JLD2
Random.seed!(0)
nothing

In [71]:
X_train = load("./data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("./data/imdb_dataset_prepared.jld2", "y_train")

println("Data loaded successfully.")

X_train = Float32.(X_train[:,1:32])
y_train = Float32.(y_train[:,1:32])

println("X_train size: ", size(X_train))
println("y_train size: ", size(y_train))

nothing

Data loaded successfully.
X_train size: (17703, 32)
y_train size: (1, 32)


In [73]:
batch_size = 32

dataset = DataLoader((X_train, y_train), batchsize=batch_size, shuffle=false)
nothing

In [74]:
input_size = size(X_train, 1)

model = Chain(
    Dense((input_size => 32), ReLU, name="hidden_layer"),
    Dense((32 => 1), σ, name="output_layer")
)
nothing

In [75]:
function compute_accuracy(actual, pred)
    (actual[1] > 0.5f0) == (pred[1] > 0.5f0) ? 1.0f0 : 0.0f0
end
nothing

In [76]:
net = NeuralNetwork(model, Adam(), binary_cross_entropy, compute_accuracy)

NeuralNetwork(Chain{Tuple{Dense{typeof(ReLU)}, Dense{typeof(σ)}}}((Dense{typeof(ReLU)}(Variable(Float32[-0.016237168 0.013551528 … 0.0016512527 0.015773153; -0.0016466284 -0.014167843 … -0.017014904 -0.017305773; … ; 1.622564f-7 -0.010556478 … -0.0064730546 -0.000620361; -0.0010941322 0.0025816397 … -0.00806352 -0.0125347935], nothing, "hidden_layer"), nothing, Main.ReLU), Dense{typeof(σ)}(Variable(Float32[0.15018842 -0.28983086 … -0.41295993 0.27046475], nothing, "output_layer"), nothing, Main.σ))), Adam(0.001f0, (0.9f0, 0.999f0), 1.1920929f-7, IdDict{Any, Tuple{Any, Any, Vector{Float32}, Any}}()), Main.binary_cross_entropy, Main.compute_accuracy, Variable(Float32[0.0; 0.0; … ; 0.0; 0.0;;], nothing, "x_input"), Variable(Float32[0.0;;], nothing, "y_true"), BroadcastedOperator{typeof(σ)}((BroadcastedOperator{typeof(mul!)}((Variable(Float32[0.15018842 -0.28983086 … -0.41295993 0.27046475], nothing, "output_layer"), BroadcastedOperator{typeof(ReLU)}((BroadcastedOperator{typeof(mul!)}((Var

In [90]:
function gradient!(grads, net, x_batch, y_batch, batch_size)
    batch_loss = 0.0f0
    batch_acc = 0.0f0

    for i in 1:batch_size
        x_sample = x_batch[:, i:i]
        y_sample = y_batch[:, i:i]
        
        net.x_node.output .= x_sample
        net.y_node.output .= y_sample
        
        batch_loss += forward!(net.sorted_graph)
        batch_acc += net.accuracy(y_sample, net.y_pred_node.output)        

        backward!(net.sorted_graph)
        
        accumulate_gradients!(grads, net.params)
    end

    for i in 1:length(grads)
        grads[i] ./= batch_size
    end
    
    return (batch_loss / batch_size, batch_acc / batch_size) 
end

function accumulate_gradients!(grad_accumulator::Vector, params::Vector)
    for (i, param) in enumerate(params)
        grad_accumulator[i] .+= param.∇
    end
end

nothing

### init, gradient!, optimize! benchmark 

In [83]:
optimizer = Adam()
net = NeuralNetwork(model, optimizer, binary_cross_entropy, compute_accuracy)
grads = [zeros(size(p.output)) for p in net.params]

nn_init_res = @benchmark NeuralNetwork($model, $optimizer, $binary_cross_entropy, $compute_accuracy) evals=1

grads_init_res = @benchmark let params = $net.params
    [zeros(size(p.output)) for p in params]
end evals=1
nothing

In [84]:
println("NeuralNetwork time (median): ", median(nn_init_res.times) / 1e9, " seconds")
println("Memory allocated: ", nn_init_res.memory / 1e6, " MB")
println("Allocations: ", nn_init_res.allocs)

println("----------------------------------------------------------")

println("Gradient array init time (minimum): ", minimum(grads_init_res.times) / 1e9, " seconds")
println("Memory allocated: ", grads_init_res.memory / 1e6, " MB")
println("Allocations: ", grads_init_res.allocs)

NeuralNetwork time (median): 4.15325e-5 seconds
Memory allocated: 0.07604 MB
Allocations: 135
----------------------------------------------------------
Gradient array init time (minimum): 0.000239661 seconds
Memory allocated: 4.532568 MB
Allocations: 11


In [87]:
batch_size = dataset.batchsize
(x_batch, y_batch) = first(dataset)

grad_result = @benchmark gradient!($grads, $net, $x_batch, $y_batch, $batch_size) evals=1
opt_result = @benchmark optimize!($net.optimizer, $net.params, $grads) evals=1
nothing

In [89]:
# Example output for grad_result
println("Gradient! time (median): ", median(grad_result.times) / 1e9, " seconds")
println("Memory allocated: ", grad_result.memory / 1e6, " MB")
println("Allocations: ", grad_result.allocs)

println("----------------------------------------------------------")

# Example output for opt_result
println("Optimize! time (minimum): ", minimum(opt_result.times) / 1e9, " seconds")
println("Memory allocated: ", opt_result.memory / 1e6, " MB")
println("Allocations: ", opt_result.allocs)

Gradient! time (median): 0.242413703 seconds
Memory allocated: 302.021424 MB
Allocations: 18819
----------------------------------------------------------
Optimize! time (minimum): 0.006912547 seconds
Memory allocated: 0.001408 MB
Allocations: 38


### deeper gradient! benchmark

In [102]:
optimizer = Adam()
net = NeuralNetwork(model, optimizer, binary_cross_entropy, compute_accuracy)
nothing

In [100]:
x_sample = x_batch[:, 1:1]
y_sample = y_batch[:, 1:1]

net.x_node.output .= x_sample
net.y_node.output .= y_sample

forward_res = @benchmark forward!(net.sorted_graph) evals=1
accuracy_res = @benchmark net.accuracy(y_sample, net.y_pred_node.output) evals=1       

backward_res = @benchmark backward!(net.sorted_graph) evals=1

accumulate_res = @benchmark accumulate_gradients!(grads, net.params) evals=1

nothing

In [101]:
println("Forward! time (median): ", median(forward_res.times) / 1e9, " seconds")
println("Memory allocated: ", forward_res.memory / 1e6, " MB")
println("Allocations: ", forward_res.allocs)

println("----------------------------------------------------------")

println("Accuracy time (minimum): ", minimum(accuracy_res.times) / 1e9, " seconds")
println("Memory allocated: ", accuracy_res.memory / 1e6, " MB")
println("Allocations: ", accuracy_res.allocs)

println("----------------------------------------------------------")

println("Backward! time (median): ", median(backward_res.times) / 1e9, " seconds")
println("Memory allocated: ", backward_res.memory / 1e6, " MB")
println("Allocations: ", backward_res.allocs)

println("----------------------------------------------------------")

println("Accumulate gradients time (minimum): ", minimum(accumulate_res.times) / 1e9, " seconds")
println("Memory allocated: ", accumulate_res.memory / 1e6, " MB")
println("Allocations: ", accumulate_res.allocs)

Forward! time (median): 0.0004634 seconds
Memory allocated: 0.004352 MB
Allocations: 132
----------------------------------------------------------
Accuracy time (minimum): 1.65e-7 seconds
Memory allocated: 1.6e-5 MB
Allocations: 1
----------------------------------------------------------
Backward! time (median): 0.00684859 seconds
Memory allocated: 9.363232 MB
Allocations: 462
----------------------------------------------------------
Accumulate gradients time (minimum): 0.000764121 seconds
Memory allocated: 6.4e-5 MB
Allocations: 2
