# Lux Basics

Follows: [https://lux.csail.mit.edu/stable/tutorials/beginner/1_Basics](https://lux.csail.mit.edu/stable/tutorials/beginner/1_Basics)

In [None]:
using Lux
using Random
using Plots
using CUDA
using LuxCUDA
using ComponentArrays
using ForwardDiff
using Zygote # autodiff backend

In [11]:
rng1 = Random.default_rng()
Random.seed!(rng, 42) # Set the random seed for reproducibility
rng2 = Xoshiro(0)
# generate random data using rng
x = rand(rng1, 5)
@show x
y = rand(rng2, 5) # different random number generator gives different values
@show y

# replication of the random number generator to control the random seed
rng3 = Xoshiro(42)
rng4 = Lux.replicate(rng3)
x3 = rand(rng3, 5)
@show x3
x4 = rand(rng4, 5) # same random number generator gives same values        
@show x4

x = [0.6293451231426089, 0.4503389405961936, 0.47740714343281776, 0.7031298490032014, 0.6733461456394962]
y = [0.4056994708920292, 0.06854582438651502, 0.8621408571954849, 0.08597086585842195, 0.6616126907308237]
x3 = [0.6293451231426089, 0.4503389405961936, 0.47740714343281776, 0.7031298490032014, 0.6733461456394962]
x4 = [0.6293451231426089, 0.4503389405961936, 0.47740714343281776, 0.7031298490032014, 0.6733461456394962]


5-element Vector{Float64}:
 0.6293451231426089
 0.4503389405961936
 0.47740714343281776
 0.7031298490032014
 0.6733461456394962

In [12]:
# Use CUDA
using LuxCUDA

if LuxCUDA.functional()
    x_cu = cu(rand(5, 3))
    @show x_cu
end

└ @ LuxCUDA /home/verlaan/.julia/packages/LuxCUDA/rqXwj/src/LuxCUDA.jl:20


In [None]:
# autodiff
f(x) = x' * x / 2
∇f(x) = x  # analytical gradient
v = randn(rng, Float32, 4)

# gradient
println("Actual Gradient: ", ∇f(v))
println("Computed Gradient via Reverse Mode AD (Zygote): ", only(Zygote.gradient(f, v))) #only unwraps the result (grad,)
println("Computed Gradient via Forward Mode AD (ForwardDiff): ", ForwardDiff.gradient(f, v))

Actual Gradient: Float32[-0.37426844, 1.1695118, 0.25684848, 0.20092508]
Computed Gradient via Reverse Mode AD (Zygote): Float32[-0.37426844, 1.1695118, 0.25684848, 0.20092508]
Computed Gradient via Forward Mode AD (ForwardDiff): Float32[-0.37426844, 1.1695118, 0.25684848, 0.20092508]


In [None]:
# Slightly more advanced AD
g(x) = x .* x ./ 2 # Jacobian is diagonal x
x = randn(rng, Float32, 5)
w = ones(Float32, 5)

println("Vextor x: ", x)
jvp = jacobian_vector_product(g, AutoForwardDiff(), x, w)
println("JVP: ", jvp)
vjp = vector_jacobian_product(g, AutoZygote(), x, w)
println("VJP: ", vjp)

Vextor x: Float32[-0.95902896, 0.9548453, 0.3785684, -0.25736085, -1.1705533]
JVP: Float32[-0.95902896, 0.9548453, 0.3785684, -0.25736085, -1.1705533]
VJP: Float32[-0.95902896, 0.9548453, 0.3785684, -0.25736085, -1.1705533]


In [None]:
# Liear regression example with Lux
n_samples = 20
x_dim = 10
y_dim = 5
model = Dense(x_dim => y_dim)
ps, st = Lux.setup(rng, model)
@show ps, typeof(ps), st # named tuple of parameters and state
ps = ComponentArray(ps)
@show ps, typeof(ps)

# Generate the data with a known linear relationship and add some noise
W = randn(rng, Float32, y_dim, x_dim)
b = randn(rng, Float32, y_dim)
x_samples = randn(rng, Float32, x_dim, n_samples)
y_samples = W * x_samples .+ b .+ 0.01f0 .* randn(rng, Float32, y_dim, n_samples)
println("x shape: ", size(x_samples), "; y shape: ", size(y_samples))

using Optimisers, Printf

lossfn = MSELoss()

println("Loss Value with true parameters: ", lossfn(W * x_samples .+ b, y_samples))

# cost for initial parameters
y_model = first(model(x_samples, ps, st))
println("Loss Value with initial parameters: ", lossfn(y_model, y_samples))

function train_model!(model, ps, st, opt, nepochs::Int)
    tstate = Training.TrainState(model, ps, st, opt)
    for i in 1:nepochs
        grads, loss, _, tstate = Training.single_train_step!(
            AutoZygote(), lossfn, (x_samples, y_samples), tstate
        )
        if i % 1000 == 1 || i == nepochs
            @printf "Loss Value after %6d iterations: %.8f\n" i loss
        end
    end
    return tstate.model, tstate.parameters, tstate.states
end

model, ps, st = train_model!(model, ps, st, Descent(0.01f0), 10000)

println("Loss Value after training: ", lossfn(first(model(x_samples, ps, st)), y_samples)) # lower than for true parameters

(ps, typeof(ps), st) = ((weight = Float32[0.1233335 0.42378408 0.062000398 -0.19484083 0.38523102 0.10262223 0.02096813 -0.42997092 0.3695187 -0.15907203; 0.42344913 0.40849015 -0.2285029 -0.50546885 0.4133596 0.09501148 0.20080848 -0.35760397 -0.023468288 -0.24797209; 0.34748197 0.5052108 0.038779575 -0.15346937 -0.19013004 -0.4029889 -0.16063868 -0.3044993 -0.18839604 0.13307981; 0.39337918 -0.38666537 0.11857732 0.21355523 0.43812287 -0.018205948 -0.4446723 0.25038174 0.26834545 0.47187915; -0.053774122 0.08671521 0.5185416 0.050246894 -0.31792536 0.124457 -0.32329604 0.44332144 -0.41103375 0.3870172], bias = Float32[0.22861098, -0.16324113, -0.080415174, 0.15626132, -0.17037798]), @NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, NamedTuple())
(ps, typeof(ps)) = ((weight = Float32[0.1233335 0.42378408 0.062000398 -0.19484083 0.38523102 0.10262223 0.02096813 -0.42997092 0.3695187 -0.15907203; 0.42344913 0.40849015 -0.2285029 -0.50546885 0.4133596 0.09501148 0.20080848 -0.

NamedTuple()