In [35]:
using MLDatasets
using MLUtils
using Statistics
using DataFrames

iris = MLDatasets.Iris()
iris

dataset Iris:
  metadata   =>    Dict{String, Any} with 4 entries
  features   =>    150×4 DataFrame
  targets    =>    150×1 DataFrame
  dataframe  =>    150×5 DataFrame

In [4]:
iris.features

Row,sepallength,sepalwidth,petallength,petalwidth
Unnamed: 0_level_1,Float64,Float64,Float64,Float64
1,5.1,3.5,1.4,0.2
2,4.9,3.0,1.4,0.2
3,4.7,3.2,1.3,0.2
4,4.6,3.1,1.5,0.2
5,5.0,3.6,1.4,0.2
6,5.4,3.9,1.7,0.4
7,4.6,3.4,1.4,0.3
8,5.0,3.4,1.5,0.2
9,4.4,2.9,1.4,0.2
10,4.9,3.1,1.5,0.1


In [7]:
X = iris.features[!, [:sepallength]] |> Matrix
y = Matrix(iris.features[!, [:sepalwidth]])
onOne = ones(150)
X1 = [onOne X]
size(X1)

(150, 2)

In [10]:
typeof(X1)

Matrix{Float64}[90m (alias for [39m[90mArray{Float64, 2}[39m[90m)[39m

In [12]:
fieldnames(typeof(X1))

(:ref, :size)

In [17]:
using Random
Random.rand(2,1)

2×1 Matrix{Float64}:
 0.41663796430399513
 0.4248323634729074

In [19]:
using Random: seed!
seed!(47)

C = rand(2,1)
maxiterations = 200
λ = 0.0005

for i in maxiterations
    tgrad = transpose(X1) * ((X1 * C) - y)
    C = C - λ * tgrad
end
C

2×1 Matrix{Float64}:
 0.30933769796094773
 0.5393529066764434

In [28]:
using Random: seed!
seed!(47)

C = rand(2,1)
maxiterations = 1000000
λ = 0.005

for t in maxiterations
    i = rand(1:150, 10)
    xi = X1[i, :]
    err = xi * C - y[i]
    tgrad = transpose(xi) * err
    C = C - λ * tgrad
end
C


2×1 Matrix{Float64}:
 0.3000343369993614
 0.5046707211493131

In [29]:
using Random: seed!
seed!(47)

C = rand(2,1)
maxiterations = 100000
λ = 0.0001

for t in maxiterations
    i = rand(1:150, 10)
    xi = X1[i, :]
    err = xi * C - y[i]
    tgrad = transpose(xi) * err
    C = C - λ * tgrad
end

C

2×1 Matrix{Float64}:
 0.2845106327949387
 0.4183986194409015

In [60]:

features = MLDatasets.BostonHousing().features
fnames = names(features)
correlations = DataFrame(cor(Matrix(features)), fnames)
insertcols!(correlations, 1, :Feature => fnames)

Row,Feature,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
Unnamed: 0_level_1,String,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,CRIM,1.0,-0.200469,0.406583,-0.0558916,0.420972,-0.219247,0.352734,-0.37967,0.625505,0.582764,0.289946,-0.385064,0.455621
2,ZN,-0.200469,1.0,-0.533828,-0.0426967,-0.516604,0.311991,-0.569537,0.664408,-0.311948,-0.314563,-0.391679,0.17552,-0.412995
3,INDUS,0.406583,-0.533828,1.0,0.062938,0.763651,-0.391676,0.644779,-0.708027,0.595129,0.72076,0.383248,-0.356977,0.6038
4,CHAS,-0.0558916,-0.0426967,0.062938,1.0,0.0912028,0.0912512,0.0865178,-0.0991758,-0.00736824,-0.0355865,-0.121515,0.0487885,-0.0539293
5,NOX,0.420972,-0.516604,0.763651,0.0912028,1.0,-0.302188,0.73147,-0.76923,0.611441,0.668023,0.188933,-0.380051,0.590879
6,RM,-0.219247,0.311991,-0.391676,0.0912512,-0.302188,1.0,-0.240265,0.205246,-0.209847,-0.292048,-0.355501,0.128069,-0.613808
7,AGE,0.352734,-0.569537,0.644779,0.0865178,0.73147,-0.240265,1.0,-0.747881,0.456022,0.506456,0.261515,-0.273534,0.602339
8,DIS,-0.37967,0.664408,-0.708027,-0.0991758,-0.76923,0.205246,-0.747881,1.0,-0.494588,-0.534432,-0.232471,0.291512,-0.496996
9,RAD,0.625505,-0.311948,0.595129,-0.00736824,0.611441,-0.209847,0.456022,-0.494588,1.0,0.910228,0.464741,-0.444413,0.488676
10,TAX,0.582764,-0.314563,0.72076,-0.0355865,0.668023,-0.292048,0.506456,-0.534432,0.910228,1.0,0.460853,-0.441808,0.543993


In [61]:
names(MLDatasets.BostonHousing().features)

13-element Vector{String}:
 "CRIM"
 "ZN"
 "INDUS"
 "CHAS"
 "NOX"
 "RM"
 "AGE"
 "DIS"
 "RAD"
 "TAX"
 "PTRATIO"
 "B"
 "LSTAT"

In [45]:
using Flux

dataset = MLDatasets.BostonHousing()
x,y = MLDatasets.BostonHousing(as_df=false)[:]
x,y = Float32.(x), Float32.(y)

function loss(model, features, labels)
    ŷ = model(features)
    Flux.mse(ŷ, labels)
end

function train_model!(f_loss, model, features, labels)
    dLdm, _, _ = gradient(f_loss, model, features, labels)
    @. model.weight = model.weight - 0.000001 * dLdm.weight
    @. model.bias = model.bias - 0.000001 * dLdm.bias
end

for ((xtrain, ytrain),(xval, yval)) in MLUtils.kfolds(shuffleobs((x,y)), k=5)
    # data contains different features with different scales.
    # check the standard deviation using `std(xtrain)`
    # to check if data needs to be normalised.

    # μ = mean(xtrain)
    # σ = std(xtrain)
    # ε or ϵ =  convert(float(eltype(xtrain)), 1e-5)
    # check out the source code
    # Flux.normalise  `@. (xtrain - μ) / sqrt(σ² + ε^2)`
    # MLUtils.normalise `(xtrain .- μ) ./ (σ .+ ϵ)`
    
    xnorm = normalise(xtrain)

    xnval = normalise(xval)
    
    model = Dense(13 => 1)

    # run an infinite loop that breaks once change in loss is < δ
    loss_init = Inf;
    while true
        train_model!(loss, model, xnorm, ytrain)
        # intialize loss value
        if loss_init == Inf
            loss_init = loss(model, xnorm, ytrain)
            println("loss initialized at ", loss_init)
            continue
        end
        # convergence check: break if change in loss is <  (1 / 10³)
        if abs(loss_init - loss(model, xnorm, ytrain)) < 1e-4
            break
        else
            loss_init = loss(model, xnorm, ytrain)
        end
    end
    println(loss(model, xnorm, ytrain))
    println("validation score: ", loss(model, xnval, yval))
end




loss initialized at 610.4873


53.667603
validation score: 56.91724
loss initialized at 607.32996


51.755875
validation score: 64.8024
loss initialized at 583.4495
51.773983


validation score: 57.780956
loss initialized at 612.7194


53.40296
validation score: 37.19609
loss initialized at 598.2733


50.07984
validation score: 56.012726


