In [1]:
using Flux, Statistics, DelimitedFiles
using Flux: Params, gradient
using Flux.Optimise: update!
using DelimitedFiles, Statistics
using Parameters: @with_kw

┌ Info: Precompiling Flux [587475ba-b771-5e3f-ad9e-33799f191a9c]
└ @ Base loading.jl:1260


In [3]:
# This replicates the housing data example from the Knet.jl readme. Although we
# could have reused more of Flux (see the mnist example), the library's
# abstractions are very lightweight and don't force you into any particular
# strategy.

# Struct to define hyperparameters
@with_kw mutable struct Hyperparams
    lr::Float64 = 0.1  # learning rate
    split_ratio::Float64 = 0.1 # Train Test split ratio, define percentage of data to be used as Test data
end

Hyperparams

In [4]:
function get_processed_data(args)

    isfile("housing.data") ||
        download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data",
            "housing.data")

    rawdata = readdlm("housing.data")'

    # The last feature is our target -- the price of the house.
    split_ratio = args.split_ratio # For the train test split

    x = rawdata[1:13,:]
    y = rawdata[14:14,:]

    # Normalise the data
    x = (x .- mean(x, dims = 2)) ./ std(x, dims = 2)

    # Split into train and test sets
    split_index = floor(Int,size(x,2)*split_ratio)
    x_train = x[:,1:split_index]
    y_train = y[:,1:split_index]
    x_test = x[:,split_index+1:size(x,2)]
    y_test = y[:,split_index+1:size(x,2)]

    train_data = (x_train, y_train)
    test_data = (x_test, y_test)

    return train_data,test_data
end

get_processed_data (generic function with 1 method)

In [5]:
# Struct to define model
mutable struct model
    W::AbstractArray
    b::AbstractVector
end

In [6]:
# Function to predict output from given parameters
predict(x, m) = m.W*x .+ m.b

# Mean Squared Error
meansquarederror(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2)
    

meansquarederror (generic function with 1 method)

In [None]:
function train(; kws...)
    # Initialize the Hyperparamters
    args = Hyperparams(; kws...)
    
    # Load the data
    (x_train,y_train),(x_test,y_test) = get_processed_data(args)
    
    # The model
    m = model((randn(1,13)),[0.])
    
    loss(x, y) = meansquarederror(predict(x, m), y)

    ## Training
    η = args.lr
    θ = params([m.W, m.b])

    for i = 1:500
      g = gradient(() -> loss(x_train, y_train), θ)
      for x in θ
        update!(x, -g[x]*η)
      end
      if i%100==0
          @show loss(x_train, y_train)
        end
    end
    
    # Predict the RMSE on the test set
    err = meansquarederror(predict(x_test, m),y_test)
    println(err)
end

In [None]:
cd(@__DIR__)
train()