In [2]:
using Pkg;

In [3]:
Pkg.activate("../")

"/home/dhughes/myr/glm/Project.toml"

In [4]:
Pkg.status()

[32m[1m    Status[22m[39m `~/myr/glm/Project.toml`
 [90m [4f1ea46c][39m[37m   AWSCore v0.6.0[39m
 [90m [0d499d91][39m[37m   AWSSDK v0.4.0[39m
 [90m [336ed68f][39m[37m   CSV v0.4.3[39m
 [90m [a93c6f00][39m[37m   DataFrames v0.17.1[39m
 [90m [587475ba][39m[92m + Flux v0.7.3[39m
 [90m [38e38edf][39m[37m   GLM v1.1.1[39m
 [90m [ce6b1742][39m[92m + RDatasets v0.6.1[39m
 [90m [f3b207a7][39m[37m   StatsPlots v0.10.2[39m


In [7]:
using CSV;
using Flux;
using RDatasets;
using AWSCore;
using AWSSDK.S3;
using StatsPlots;
using DataFrames;
using Flux.Tracker, Statistics, DelimitedFiles
using Flux.Tracker: Params, gradient, update!
using Flux: gpu

In [1]:
# Expand cell width to 100% browser width for Jupyter notebook
display("text/html", "<style>.container { width:100% !important; }</style>")

In [8]:
cd(@__DIR__)

isfile("housing.data") ||
  download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data",
           "housing.data")

rawdata = readdlm("housing.data")'

14×505 LinearAlgebra.Adjoint{Float64,Array{Float64,2}}:
   0.02731    0.02729    0.03237  …    0.06076    0.10959    0.04741
   0.0        0.0        0.0           0.0        0.0        0.0    
   7.07       7.07       2.18         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.469      0.469      0.458         0.573      0.573      0.573  
   6.421      7.185      6.998    …    6.976      6.794      6.03   
  78.9       61.1       45.8          91.0       89.3       80.8    
   4.9671     4.9671     6.0622        2.1675     2.3889     2.505  
   2.0        2.0        3.0           1.0        1.0        1.0    
 242.0      242.0      222.0         273.0      273.0      273.0    
  17.8       17.8       18.7      …   21.0       21.0       21.0    
 396.9      392.83     394.63        396.9      393.45     396.9    
   9.14       4.03       2.94          5.64       6.48       7.88   
  21.6       34.7       33.4          23.9     

In [19]:
# The last feature is our target -- the price of the house.
x = rawdata[1:13,:] |> gpu
y = rawdata[14:14,:] |> gpu

1×505 Array{Float64,2}:
 21.6  34.7  33.4  36.2  28.7  22.9  …  16.8  22.4  20.6  23.9  22.0  11.9

In [20]:
# Normalise the data
x = (x .- mean(x, dims = 2)) ./ std(x, dims = 2)

13×505 Array{Float64,2}:
 -0.417416  -0.417418  -0.416828  …  -0.41353   -0.407858  -0.415081
 -0.486234  -0.486234  -0.486234     -0.486234  -0.486234  -0.486234
 -0.595732  -0.595732  -1.30899       0.11315    0.11315    0.11315 
 -0.272618  -0.272618  -0.272618     -0.272618  -0.272618  -0.272618
 -0.739098  -0.739098  -0.833934      0.15753    0.15753    0.15753 
  0.194741   1.28121    1.01528   …   0.983996   0.725177  -0.361293
  0.366208  -0.265527  -0.808535      0.795646   0.735312   0.433641
  0.556346   0.556346   1.0759       -0.771891  -0.66685   -0.611768
 -0.868939  -0.868939  -0.754097     -0.983782  -0.983782  -0.983782
 -0.987128  -0.987128  -1.10573      -0.803294  -0.803294  -0.803294
 -0.306024  -0.306024   0.110158  …   1.17373    1.17373    1.17373 
  0.441136   0.396591   0.416291      0.441136   0.403377   0.441136
 -0.494157  -1.20985   -1.36251      -0.984357  -0.866709  -0.670629

In [21]:
# The model

W = param(randn(1,13)/10) |> gpu
b = param([0.]) |> gpu

predict(x) = W*x .+ b
meansquarederror(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2)
loss(x, y) = meansquarederror(predict(x), y)

loss (generic function with 1 method)

In [22]:
η = 0.1
θ = Params([W, b])

for i = 1:10
  g = gradient(() -> loss(x, y), θ)
  for x in θ
    update!(x, -g[x]*η)
  end
  @show loss(x, y)
end

loss(x, y) = 366.7771712469101 (tracked)
loss(x, y) = 241.6786942772334 (tracked)
loss(x, y) = 162.90324261548662 (tracked)
loss(x, y) = 112.69588372735977 (tracked)
loss(x, y) = 80.62653266183862 (tracked)
loss(x, y) = 60.11191125141034 (tracked)
loss(x, y) = 46.96721934334417 (tracked)
loss(x, y) = 38.52801177969454 (tracked)
loss(x, y) = 33.09606636798708 (tracked)
loss(x, y) = 29.588018136899038 (tracked)


In [23]:
predict(x[:,1]) / y[1]

Tracked 1-element Array{Float64,1}:
 1.0588200637999579