In [16]:
#load "../Data/Data.fsx"
#r "nuget: Plotly.NET"
#r "nuget: Plotly.NET.Interactive"

open Data
open Plotly.NET

### Load data and split in (training * validaion) tuple ###

In [17]:
let allData = loadData()

let training, validation = allData |> splitData 0.7

### Cost function (Ordinary Least Squares) ###

In [18]:
let cost (obs: Obs seq) (model: Model) =
    obs |> Seq.sumBy (fun obs -> (model obs - float obs.Cnt) ** 2.) |> sqrt
    
let trainingCost = cost training

let validationCost = cost validation

let overallCost = cost allData.Rows

In [19]:
let model (theta0, theta1) (obs: Obs) = theta0 + theta1 * float obs.Instant

In [20]:
let evaluateModel model =
    validation |> Array.averageBy (fun obs -> abs (model obs - float obs.Cnt))
    

In [21]:
let updateThetas (alpha: float) (theta0, theta1) (obs: Obs) =
    let x = float obs.Instant
    let y = float obs.Cnt
    let theta0' = theta0 - 2. * alpha * (theta0 + theta1 * x - y)
    let theta1' = theta1 - 2. * alpha * x * (theta0 + theta1 * x - y)
    (theta0', theta1')
    
let batchUpdateThetas rate (theta0, theta1) (data: Obs seq) =
    let thetas = data |> Seq.map (updateThetas rate (theta0, theta1))

    let theta0' = thetas |> Seq.averageBy fst
    let theta1' = thetas |> Seq.averageBy snd
    (theta0', theta1')
    
let stochasticEstimation rate (theta0, theta1) =
    training |> Seq.fold (updateThetas rate) (theta0, theta1)
    
let batchEstimation rate iters (data : Obs seq)=
    let rec search (theta0, theta1) i =
        if i = 0 then (theta0, theta1)
        else
            search (batchUpdateThetas rate (theta0, theta1) data) (i-1)
    
    search (0.,0.) iters
    


### Optimal learning rate (alpha) estimation ###

In [22]:
let rate =
    [ for r in 1..30 -> (pown 0.1 r), stochasticEstimation (pown 0.1 r) (0., 0.) |> model |> trainingCost ]
    |> Seq.filter (fun (rate, error) -> error > 0)
    |> Seq.minBy snd
    |> fst

### Baseline model - always return average of training data ###

In [23]:
let baselineModel =
    fun (_: Obs) -> allData.Rows |> Seq.averageBy (fun x -> float x.Cnt)

In [24]:
let thetas1 = stochasticEstimation rate (0., 0.)
let model1 = thetas1 |> model

In [25]:
let thetas2 = batchEstimation (pown 0.1 6) 1_000_000 training
let model2 = thetas2 |> model

In [26]:
evaluateModel baselineModel |> printfn "Naive model average error: %f"
thetas1 |> printfn "Model 1 thetas: %A"
evaluateModel model1 |> printfn "Model 1 average error: %f"
thetas2 |> printfn "Model 2 thetas: %A"
evaluateModel model2 |> printfn "Model 2 average error: %f"

Naive model average error: 1619.742275
Model 1 thetas: (0.08445985761, 9.643593597)
Model 1 average error: 1539.519699
Model 2 thetas: (995.8878196, 8.539658963)
Model 2 average error: 1211.250960


In [27]:
let createModelChart (model: Model) =
    training |> Array.map (fun obs -> (float obs.Instant, model obs)) |> Chart.Line
    
let chart =
    allData.Rows
    |> Seq.map (fun obs -> (float obs.Instant, float obs.Cnt))
    |> Chart.Point
    
let baselineChart = createModelChart baselineModel

let model1Chart = createModelChart model1

let model2Chart = createModelChart model2

[ chart; baselineChart; model1Chart; model2Chart]
|> Chart.combine
