# Training

In [None]:
# installing dependencies, remove if already installed
using Pkg
deps = ["DataFrames", "CSV", "ScikitLearn", "Statistics", "Flux", "CUDA", "cuDNN"]
Pkg.add(deps)

[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m CUDNN_jll ─ v9.0.0+1
[32m[1m   Installed[22m[39m cuDNN ───── v1.3.1
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.10/Project.toml`
  [90m[02a925ec] [39m[92m+ cuDNN v1.3.1[39m
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.10/Manifest.toml`
  [90m[02a925ec] [39m[92m+ cuDNN v1.3.1[39m
  [90m[62b44479] [39m[92m+ CUDNN_jll v9.0.0+1[39m
[32m[1mPrecompiling[22m[39m project...
[32m  ✓ [39m[90mCUDNN_jll[39m
[32m  ✓ [39mcuDNN
[32m  ✓ [39m[90mNNlib → NNlibCUDACUDNNExt[39m
[32m  ✓ [39m[90mFlux → FluxCUDAcuDNNExt[39m
  4 dependencies successfully precompiled in 13 seconds. 166 already precompiled.


In [1]:
using DataFrames, CSV, ScikitLearn, Statistics, Flux, CUDA

[32m[1mPrecompiling[22m[39m CUDA
[32m  ✓ [39mCUDA
  1 dependency successfully precompiled in 37 seconds. 70 already precompiled.
[32m[1mPrecompiling[22m[39m NNlibCUDAExt
[32m  ✓ [39m[90mNNlib → NNlibCUDAExt[39m
  1 dependency successfully precompiled in 6 seconds. 72 already precompiled.
[32m[1mPrecompiling[22m[39m FluxCUDAExt
[32m  ✓ [39m[90mCUDNN_jll[39m
[32m  ✓ [39m[90mCUDA → SpecialFunctionsExt[39m
[32m  ✓ [39mcuDNN
[32m  ✓ [39m[90mNNlib → NNlibCUDACUDNNExt[39m
[32m  ✓ [39m[90mFlux → FluxCUDAExt[39m
[32m  ✓ [39m[90mFlux → FluxCUDAcuDNNExt[39m
  6 dependencies successfully precompiled in 18 seconds. 141 already precompiled.


In [2]:
CUDA.runtime_version()

LoadError: CUDA error (code 35, UnknownMember)

In [3]:
CUDA.set_runtime_version!(v"12.0.140")

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mConfigure the active project to use CUDA 12.0; please re-start Julia for this to take effect.


In [4]:
CUDA.driver_version()

LoadError: UndefVarError: `libcuda` not defined

In [6]:
CUDA.system_driver_version()

In [7]:
@assert CUDA.functional(true) #check gpu support

LoadError: CUDA driver not found

In [8]:
CUDA.versioninfo()

LoadError: CUDA driver not found

In [4]:
# initialize datasets
train_df = DataFrame(CSV.File("../../data/train.csv"))
val_df = DataFrame(CSV.File("../../data/validation.csv"))
test_df = DataFrame(CSV.File("../../data/test.csv"))

Row,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64?,Float64,Float64,Float64,String15,Float64
1,-118.36,34.06,39.0,2810.0,670.0,1109.0,624.0,3.25,<1H OCEAN,355000.0
2,-119.78,36.78,37.0,2185.0,455.0,1143.0,438.0,1.9784,INLAND,70700.0
3,-122.42,37.73,46.0,1819.0,411.0,1534.0,406.0,4.0132,NEAR BAY,229400.0
4,-122.28,37.81,52.0,340.0,97.0,200.0,87.0,1.5208,NEAR BAY,112500.0
5,-118.13,33.82,37.0,1530.0,290.0,711.0,283.0,5.1795,<1H OCEAN,225400.0
6,-118.16,34.15,17.0,821.0,163.0,229.0,164.0,7.3715,<1H OCEAN,263000.0
7,-120.44,34.91,12.0,3189.0,463.0,1200.0,442.0,5.299,<1H OCEAN,226800.0
8,-122.48,38.31,29.0,2375.0,560.0,1124.0,502.0,2.3276,<1H OCEAN,166200.0
9,-117.64,34.08,35.0,1254.0,241.0,729.0,253.0,3.495,INLAND,118000.0
10,-118.16,34.04,45.0,332.0,70.0,302.0,60.0,3.1895,<1H OCEAN,156300.0


In [5]:
#remove outliers values, q represents the quantile where the data outside them will be removed
function remove_outliers( df::DataFrame, feat::String, q=0.05 )
    Qi = quantile(df[:,feat], q)
    Qf = quantile(df[:,feat], 1-q)
    IQR = Qf-Qi

    Qi -= 1.5*IQR
    Qf += 1.5*IQR
    
    return df[ (df[:,feat] .> Qi) .& (df[:,feat] .< Qf) , : ]
end

function apply_feature_engineering(df::DataFrame, keep_outliers::Bool)
    df.total_bedrooms .= coalesce.(df.total_bedrooms, mean(skipmissing(df.total_bedrooms)))
    
    #encode ocean_proximity column
    custom_encoding = Dict("ISLAND" => 4, "NEAR OCEAN" => 3, "NEAR BAY" => 2, "<1H OCEAN" => 1, "INLAND" => 0)
    df.ocean_proximity_enc = get.(Ref(custom_encoding), df.ocean_proximity, missing)
    select!(df, Not(:ocean_proximity))
    
    df.rooms_per_bedroom = df.total_rooms ./ df.total_bedrooms
    df.rooms_per_household = df.total_rooms ./ df.households
    df.encoded_position = df.longitude + df.latitude
    df.population_per_bedrooms = df.population ./ df.total_bedrooms
    df.target = df.median_house_value
    select!(df, Not(:median_house_value))
    
    if !keep_outliers
        for name in names(df)
            df = remove_outliers(df, name, 0.05)
        end
    end
    
    return df
end

apply_feature_engineering (generic function with 1 method)

In [11]:
function rmse(y_true::AbstractVector, y_pred::AbstractVector)
    return sqrt(mean((y_pred .- y_true) .^ 2))
end

rmse (generic function with 1 method)

In [7]:
# split the dataset into features and target, specifically for flux we need to transpose the features matrix
function split_target(df::DataFrame)
    x = transpose(Matrix(select(df, Not(:target))))
    y = Vector(df.target)
    return x, y
end

split_target (generic function with 1 method)

In [42]:
function build_model(inputs::Int,
    layers::Vector{Int},
    layers_per_dropout::Int=0,
    dropout_rate::Float64=0.0,
    activation_func::Function=Flux.relu
)
    layer_vec = Vector{Any}()
    push!(layer_vec, Flux.Dense(inputs => layers[1], activation_func)) #add input layer

    count = 1
    for i in 2:length(layers)
        push!(layer_vec, Flux.Dense(layers[i-1] => layers[i], activation_func))
        count += 1
        if layers_per_dropout > 0 && count % layers_per_dropout == 0
            push!(layer_vec, Flux.Dropout(dropout_rate))
            count = 0
        end
    end
    push!(layer_vec, Flux.Dense(last(layers) => 1))
    model = Flux.Chain(layer_vec) |> gpu #move the modelt to the gpu
    return model
end

function simple_train_model(train_df::DataFrame,
    test_df::DataFrame,
    layers::Vector{Int},
    layers_per_dropout::Int=0,
    dropout_rate::Float64=0.0,
    activation_func::Function=Flux.relu,
    loss_func::Function=Flux.mse,
    optimizer=Flux.ADAM,
    patience::Int=20
)

    x_train, y_train = split_target(apply_feature_engineering(train_df, false))
    x_test, y_test = split_target(apply_feature_engineering(test_df, false)) |> gpu

    model = build_model(size(x_train, 1), layers, layers_per_dropout, dropout_rate, activation_func)
    opt = Flux.setup(optimizer(), model)
    y_train = reshape(y_train, 1, :) 
    y_test = reshape(y_test, 1, :)

    acc = let best_loss = Inf #early stopping callback
        () -> begin
            loss_func(model(x_test), y_test)
        end 
    end
    es = Flux.early_stopping(acc, patience, init_score = Inf)

    for epoch in 1:100
        Flux.train!(model, [(x_train, y_train)] |> gpu, opt) do m, x, y
            y_hat = m(x)
            loss_func(y_hat, y)
        end

        if es()
            println("Early stopping at epoch ", epoch)
            break
        end
    end

    println("done")
    return model
end

simple_train_model (generic function with 7 methods)

In [43]:
model = simple_train_model(train_df, val_df, [64, 32, 16], 2, 0.2)

┌ Info: The CUDA function is being called but CUDA.jl is not functional.
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ FluxCUDAExt C:\Users\tcivi\.julia\packages\Flux\Wz6D4\ext\FluxCUDAExt\FluxCUDAExt.jl:35


done


Chain([
  Array(
    Dense(13 => 64, relu),              [90m# 896 parameters[39m
    Dense(64 => 32, relu),              [90m# 2_080 parameters[39m
    Dropout(0.2),
    Dense(32 => 16, relu),              [90m# 528 parameters[39m
    Dense(16 => 1),                     [90m# 17 parameters[39m
  ),
])[90m                  # Total: 8 arrays, [39m3_521 parameters, 14.230 KiB.

In [44]:
x_test, y_test = split_target(apply_feature_engineering(copy(test_df), true))

# Evaluate the model on the test data
predictions = model(x_test)

# Check the RMSE
_rmse = sqrt(mean((predictions .- y_test) .^ 2))
println("RMSE on test data: ", _rmse)

RMSE on test data: 126361.63818513002


In [45]:
_rmse

126361.63818513002