In [3]:
include("./src/structs.jl")
include("./src/scalar_operators.jl")
include("./src/broadcast_operators.jl")
include("./src/graph.jl")
include("./src/forward.jl")
include("./src/backward.jl")
include("./src/data_loader.jl")
include("./src/nn_model.jl")
include("./src/optimisers.jl")
include("./src/losses.jl")
include("./src/layers.jl")
nothing

In [14]:
using Printf, Random
using JLD2
Random.seed!(0)
nothing

In [7]:
X_train = load("./data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("./data/imdb_dataset_prepared.jld2", "y_train")
X_test = load("./data/imdb_dataset_prepared.jld2", "X_test")
y_test = load("./data/imdb_dataset_prepared.jld2", "y_test")

println("Data loaded successfully.")
println("X_train size: ", size(X_train))
println("y_train size: ", size(y_train))
println("X_test size: ", size(X_test))
println("y_test size: ", size(y_test))

X_train = Float32.(X_train)
y_train = Float32.(y_train)
X_test = Float32.(X_test)
y_test = Float32.(y_test)
nothing

Data loaded successfully.
X_train size: (17703, 8000)
y_train size: (1, 8000)
X_test size: (17703, 2000)
y_test size: (1, 2000)


In [8]:
batch_size = 32

dataset = DataLoader((X_train, y_train), batchsize=batch_size, shuffle=true)
testset = DataLoader((X_test, y_test), batchsize=batch_size, shuffle=false)
nothing

In [9]:
input_size = size(X_train, 1)

model = Chain(
    Dense((input_size => 32), ReLU, name="hidden_layer"),
    Dense((32 => 1), σ, name="output_layer")
)
nothing

In [10]:
function compute_accuracy(pred, actual)
    (pred[1] > 0.5f0) == (actual[1] > 0.5f0) ? 1.0f0 : 0.0f0
end
nothing

In [11]:
net = NeuralNetwork(model, Adam(), binary_cross_entropy, compute_accuracy)

NeuralNetwork(Chain{Tuple{Dense{typeof(ReLU)}, Dense{typeof(σ)}}}((Dense{typeof(ReLU)}(Variable(Float32[-0.016237168 0.013551528 … 0.0016512527 0.015773153; -0.0016466284 -0.014167843 … -0.017014904 -0.017305773; … ; 1.622564f-7 -0.010556478 … -0.0064730546 -0.000620361; -0.0010941322 0.0025816397 … -0.00806352 -0.0125347935], nothing, "hidden_layer"), nothing, Main.ReLU), Dense{typeof(σ)}(Variable(Float32[0.15018842 -0.28983086 … -0.41295993 0.27046475], nothing, "output_layer"), nothing, Main.σ))), Adam(0.001f0, (0.9f0, 0.999f0), 1.1920929f-7, IdDict{Any, Tuple{Any, Any, Vector{Float32}, Any}}()), Main.binary_cross_entropy, Main.compute_accuracy, Variable(Float32[0.0; 0.0; … ; 0.0; 0.0;;], nothing, "x_input"), Variable(Float32[0.0;;], nothing, "y_true"), BroadcastedOperator{typeof(σ)}((BroadcastedOperator{typeof(mul!)}((Variable(Float32[0.15018842 -0.28983086 … -0.41295993 0.27046475], nothing, "output_layer"), BroadcastedOperator{typeof(ReLU)}((BroadcastedOperator{typeof(mul!)}((Var

In [15]:
epochs = 60
for epoch in 1:epochs
    t = @elapsed begin
        train_loss, train_acc = train!(net, dataset)
    end
    
    test_loss, test_acc = evaluate(net, testset)
    @printf("Epoch %d/%d: Train Loss: %.4f, Train Acc: %.4f, Test Loss: %.4f, Test Acc: %.4f, Time: %.2fs\n",
            epoch, epochs, train_loss, train_acc, test_loss, test_acc, t)
end

Epoch 1/60: Train Loss: 0.6892, Train Acc: 0.6914, Test Loss: 0.6884, Test Acc: 0.7656, Time: 2.56s
Epoch 2/60: Train Loss: 0.6860, Train Acc: 0.7734, Test Loss: 0.6858, Test Acc: 0.7500, Time: 2.79s
Epoch 3/60: Train Loss: 0.6820, Train Acc: 0.7930, Test Loss: 0.6821, Test Acc: 0.7188, Time: 2.39s
Epoch 4/60: Train Loss: 0.6776, Train Acc: 0.7812, Test Loss: 0.6780, Test Acc: 0.7812, Time: 1.89s
Epoch 5/60: Train Loss: 0.6700, Train Acc: 0.8320, Test Loss: 0.6740, Test Acc: 0.7656, Time: 2.02s
Epoch 6/60: Train Loss: 0.6652, Train Acc: 0.7930, Test Loss: 0.6692, Test Acc: 0.7656, Time: 2.09s
Epoch 7/60: Train Loss: 0.6550, Train Acc: 0.8789, Test Loss: 0.6643, Test Acc: 0.7500, Time: 1.76s
Epoch 8/60: Train Loss: 0.6482, Train Acc: 0.8750, Test Loss: 0.6579, Test Acc: 0.7188, Time: 1.90s
Epoch 9/60: Train Loss: 0.6396, Train Acc: 0.8750, Test Loss: 0.6516, Test Acc: 0.7344, Time: 1.70s
Epoch 10/60: Train Loss: 0.6272, Train Acc: 0.8984, Test Loss: 0.6451, Test Acc: 0.7344, Time: 1.73s