In [1]:
using Flux, Statistics, DelimitedFiles
using Flux: params, gradient
using Flux.Optimise: update!, Descent
using Flux: mse, @epochs
using Statistics: mean

In [2]:
# Read the housing data
cd(@__DIR__)
isfile("HousingData/housing.data") ||
  download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data",
           "HousingData/housing.data")

rawdata = readdlm("HousingData/housing.data")'

14×505 adjoint(::Matrix{Float64}) with eltype Float64:
   0.02731    0.02729    0.03237  …    0.06076    0.10959    0.04741
   0.0        0.0        0.0           0.0        0.0        0.0
   7.07       7.07       2.18         11.93      11.93      11.93
   0.0        0.0        0.0           0.0        0.0        0.0
   0.469      0.469      0.458         0.573      0.573      0.573
   6.421      7.185      6.998    …    6.976      6.794      6.03
  78.9       61.1       45.8          91.0       89.3       80.8
   4.9671     4.9671     6.0622        2.1675     2.3889     2.505
   2.0        2.0        3.0           1.0        1.0        1.0
 242.0      242.0      222.0         273.0      273.0      273.0
  17.8       17.8       18.7      …   21.0       21.0       21.0
 396.9      392.83     394.63        396.9      393.45     396.9
   9.14       4.03       2.94          5.64       6.48       7.88
  21.6       34.7       33.4          23.9       22.0       11.9

In [3]:
# The last feature is our target: the price of the house.
split_ratio = 0.1 # For the train test split

x = rawdata[1:13,:]
y = rawdata[14:14,:]

# Normalise the data
x = (x .- mean(x, dims = 2)) ./ std(x, dims = 2)

# Split into train and test sets
split_index = floor(Int,size(x,2)*split_ratio)
x_train = x[:,1:split_index]
y_train = y[:,1:split_index]
x_test = x[:,split_index+1:size(x,2)]
y_test = y[:,split_index+1:size(x,2)]

1×455 Matrix{Float64}:
 20.5  25.0  23.4  18.9  35.4  24.7  …  16.8  22.4  20.6  23.9  22.0  11.9

In [8]:
## FIRST WAY: Manually set up a least squares model
W = params(rand(1,13)/10)
b = params([0.1])

predict(x) = W*x .+ b
loss(x, y) = mse(predict(x), y)

loss (generic function with 1 method)

In [9]:
## TRAIN YOUR MODEL:
## ONE WAY: Code your own gradient descent
η = 0.1
θ = params([W, b])

for i = 1:100
  g = gradient(() -> loss(x_train, y_train), θ)
  for x in θ
    update!(x, -g[x]*η)
  end
  @show loss(x_train, y_train), loss(x_test,y_test)
end

# Report MSE on the test set
test_err = loss(x_test,y_test)
println(test_err)

LoadError: MethodError: in(::Zygote.Params, ::Zygote.IdSet{Any}) is ambiguous. Candidates:
  in(x, s::Zygote.IdSet) in Zygote at /Users/kalyani/.julia/packages/Zygote/RxTZu/src/tools/idset.jl:12
  in(x::Zygote.Params, args...; kwargs...) in Zygote at /Users/kalyani/.julia/packages/MacroTools/gME9C/src/examples/forward.jl:17
Possible fix, define
  in(::[0mZygote.Params, ::[0mZygote.IdSet)

In [None]:
## SECOND WAY: Train the model using Flux's inbuilt gradient descent optimizer
η = 0.1
θ = Params([W, b])

# Gradient descent optimiser with learning rate 0.5.
optimiser = Descent(η)

# Create iterator to train model over 100 epochs.
data_iterator = Iterators.repeated((x_train, y_train), 100)

# Call back
evalcb() = @show(loss(x_train, y_train))

# Report MSE on the test set
test_err = loss(x_test,y_test)
println(test_err)

println("Starting training.")
Flux.train!(loss, θ, data_iterator, optimiser, cb = evalcb)


In [None]:
## THIRD WAY: Use epochs to loop over data set multiple times
η = 0.3
θ = Params([W, b])

# Gradient descent optimiser with learning rate 0.5.
optimiser = Descent(η)

# Create iterator to train model over 100 epochs.
data_iterator = Iterators.repeated((x_train, y_train), 100)

# Call back
evalcb() = @show(loss(x_train, y_train))

println("Starting training")
@epochs 1 Flux.train!(loss, θ, data_iterator, optimiser, cb = evalcb)

In [None]:
# Report MSE on the test set
test_err = loss(x_test,y_test)
println("Test error = $(test_err)")