In [3]:
include("./src/structs.jl")
include("./src/scalar_operators.jl")
include("./src/broadcast_operators.jl")
include("./src/graph.jl")
include("./src/forward.jl")
include("./src/backward.jl")
include("./src/data_loader.jl")
include("./src/nn_model.jl")
include("./src/optimisers.jl")
include("./src/losses.jl")
include("./src/layers.jl")
nothing

In [14]:
using Printf, Random
using JLD2
Random.seed!(0)
nothing

In [7]:
X_train = load("./data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("./data/imdb_dataset_prepared.jld2", "y_train")
X_test = load("./data/imdb_dataset_prepared.jld2", "X_test")
y_test = load("./data/imdb_dataset_prepared.jld2", "y_test")

println("Data loaded successfully.")
println("X_train size: ", size(X_train))
println("y_train size: ", size(y_train))
println("X_test size: ", size(X_test))
println("y_test size: ", size(y_test))

X_train = Float32.(X_train)
y_train = Float32.(y_train)
X_test = Float32.(X_test)
y_test = Float32.(y_test)
nothing

Data loaded successfully.
X_train size: (17703, 8000)
y_train size: (1, 8000)
X_test size: (17703, 2000)
y_test size: (1, 2000)


In [8]:
batch_size = 32

dataset = DataLoader((X_train, y_train), batchsize=batch_size, shuffle=true)
testset = DataLoader((X_test, y_test), batchsize=batch_size, shuffle=false)
nothing

In [9]:
input_size = size(X_train, 1)

model = Chain(
    Dense((input_size => 32), ReLU, name="hidden_layer"),
    Dense((32 => 1), σ, name="output_layer")
)
nothing

In [23]:
function compute_accuracy(actual, pred)
    (actual[1] > 0.5f0) == (pred[1] > 0.5f0) ? 1.0f0 : 0.0f0
end
nothing

In [17]:
net = NeuralNetwork(model, Adam(), binary_cross_entropy, compute_accuracy)

NeuralNetwork(Chain{Tuple{Dense{typeof(ReLU)}, Dense{typeof(σ)}}}((Dense{typeof(ReLU)}(Variable(Float32[0.0073064817 0.006928116 … 0.013018726 -0.044078223; 0.02365332 -0.006910294 … -0.022450773 0.045690056; … ; 0.01723829 -0.002930647 … -0.018296892 0.06107838; 0.007813497 -0.0046908823 … 0.0036040726 -0.065520555], nothing, "hidden_layer"), nothing, Main.ReLU), Dense{typeof(σ)}(Variable(Float32[0.70563793 -0.87184596 … -0.9922799 0.8233849], nothing, "output_layer"), nothing, Main.σ))), Adam(0.001f0, (0.9f0, 0.999f0), 1.1920929f-7, IdDict{Any, Tuple{Any, Any, Vector{Float32}, Any}}()), Main.binary_cross_entropy, Main.compute_accuracy, Variable(Float32[0.0; 0.0; … ; 0.0; 0.0;;], nothing, "x_input"), Variable(Float32[0.0;;], nothing, "y_true"), BroadcastedOperator{typeof(σ)}((BroadcastedOperator{typeof(mul!)}((Variable(Float32[0.70563793 -0.87184596 … -0.9922799 0.8233849], nothing, "output_layer"), BroadcastedOperator{typeof(ReLU)}((BroadcastedOperator{typeof(mul!)}((Variable(Float32

In [24]:
epochs = 60
for epoch in 1:epochs
    t = @elapsed begin
        train_loss, train_acc = train!(net, dataset)
    end
    
    test_loss, test_acc = evaluate(net, testset)
    @printf("Epoch %d/%d: Train Loss: %.4f, Train Acc: %.4f, Test Loss: %.4f, Test Acc: %.4f, Time: %.2fs\n",
            epoch, epochs, train_loss, train_acc, test_loss, test_acc, t)
end

Epoch 1/60: Train Loss: 0.0950, Train Acc: 0.9766, Test Loss: 0.3349, Test Acc: 0.8438, Time: 2.44s
Epoch 2/60: Train Loss: 0.1080, Train Acc: 0.9609, Test Loss: 0.3362, Test Acc: 0.8438, Time: 1.73s
Epoch 3/60: Train Loss: 0.0891, Train Acc: 0.9805, Test Loss: 0.3361, Test Acc: 0.8438, Time: 1.90s
Epoch 4/60: Train Loss: 0.1211, Train Acc: 0.9570, Test Loss: 0.3372, Test Acc: 0.8438, Time: 2.17s
Epoch 5/60: Train Loss: 0.1102, Train Acc: 0.9727, Test Loss: 0.3386, Test Acc: 0.8438, Time: 2.02s
Epoch 6/60: Train Loss: 0.0982, Train Acc: 0.9766, Test Loss: 0.3374, Test Acc: 0.8438, Time: 1.78s
Epoch 7/60: Train Loss: 0.0705, Train Acc: 0.9883, Test Loss: 0.3364, Test Acc: 0.8438, Time: 1.93s
Epoch 8/60: Train Loss: 0.0866, Train Acc: 0.9844, Test Loss: 0.3347, Test Acc: 0.8438, Time: 2.11s
Epoch 9/60: Train Loss: 0.0935, Train Acc: 0.9688, Test Loss: 0.3330, Test Acc: 0.8438, Time: 1.78s
Epoch 10/60: Train Loss: 0.0752, Train Acc: 0.9922, Test Loss: 0.3315, Test Acc: 0.8438, Time: 1.73s