In [None]:
#i "nuget:https://www.myget.org/F/gregs-experimental-packages/api/v3/index.json"

#r "nuget:Deedle.DotNet.Interactive.Extension,0.1.0-alpha8"
#r "nuget:Deedle"
#r "nuget:FSharp.Charting"

In [None]:
open Deedle

type Model = string -> string -> float

let df = Frame.ReadCsv("/workspaces/transport tycoon/23/s02e03_train.csv")
df

Unnamed: 0_level_0,TRANSPORT,TIME,A,B,SPEED
Unnamed: 0_level_1,System.String,System.DateTime,System.String,System.String,System.Decimal
0,CARGO_15100,11/25/1859 10:16:40,Steamdrift,Rustport,74.09
1,CARGO_15101,11/26/1859 03:58:07,Rustport,Irondale,73.6
2,CARGO_15101,11/26/1859 13:02:07,Irondale,Leverstorm,69.37
3,CARGO_15102,11/26/1859 19:55:34,Leverstorm,Copperhold,82.58
4,CARGO_15102,11/27/1859 09:23:42,Copperhold,Cogburg,77.73
5,CARGO_15103,11/28/1859 02:30:18,Cogburg,Steamdrift,74.17
6,CARGO_15104,11/28/1859 19:45:01,Steamdrift,Cogburg,73.59
7,CARGO_15104,11/29/1859 08:41:30,Cogburg,Irondale,79.9
8,CARGO_15104,11/29/1859 14:59:00,Irondale,Gizbourne,83.6
9,CARGO_15105,11/29/1859 22:00:28,Gizbourne,Irondale,74.88


In [None]:
// Global avg model
let avgTime = df?SPEED.Sum() / (float df.RowCount)

let model_GlobalAvg: Model = fun _ _ -> avgTime

In [None]:
let evaluate model =
    let dfTest = Frame.ReadCsv("/workspaces/transport tycoon/23/s02e03_test.csv")

    dfTest?prediction <-
        dfTest.Rows 
        |> Series.mapValues (fun row -> model (row.GetAs<string>("A")) (row.GetAs<string>("B")))
    dfTest?error <-
        dfTest.Rows
        |> Series.mapValues (fun row -> Math.Pow(row?SPEED - row?prediction, 2))
    dfTest

let eval_GlobalAvg = evaluate model_GlobalAvg
eval_GlobalAvg

Unnamed: 0_level_0,TRANSPORT,TIME,A,B,SPEED,prediction,error
Unnamed: 0_level_1,System.String,System.DateTime,System.String,System.String,System.Decimal,System.Double,System.Double
0,CARGO_17490,12/13/1859 10:56:05,Gizbourne,Rustport,72.17,77.17352508178855,25.03526324408714
1,CARGO_17491,12/14/1859 03:18:00,Rustport,Gizbourne,74.55,77.17352508178855,6.882883854773655
2,CARGO_17492,12/14/1859 15:18:37,Gizbourne,Leverstorm,72.1,77.17352508178855,25.740656755537614
3,CARGO_17493,12/15/1859 00:56:21,Leverstorm,Irondale,65.32,77.17352508178855,140.5060568645905
4,CARGO_17494,12/15/1859 05:17:14,Irondale,Copperhold,79.35,77.17352508178855,4.737043069603494
5,CARGO_17495,12/15/1859 10:33:25,Copperhold,Irondale,65.47,77.17352508178855,136.97249934005382
6,CARGO_17495,12/15/1859 17:25:31,Irondale,Gizbourne,76.58,77.17352508178855,0.3522720227121121
7,CARGO_17496,12/16/1859 00:22:25,Gizbourne,Irondale,75.7,77.17352508178855,2.171276166659957
8,CARGO_17497,12/16/1859 06:23:09,Irondale,Gizbourne,87.49,77.17352508178855,106.42965473808574
9,CARGO_17498,12/16/1859 13:27:25,Gizbourne,Irondale,74.39,77.17352508178855,7.748011880945975


In [None]:
let mse = eval_GlobalAvg?error.Sum() / (float eval_GlobalAvg.RowCount)
$"MSE is {mse}"

MSE is 60.87872530180129

In [None]:
// Per-Journey avg model
let groupedByJourney: Frame<(string * (string * int)), string> =
    df
    |> Frame.groupRowsBy "A"
    |> Frame.groupRowsBy "B"
    
let journeyAvg = groupedByJourney?SPEED
                |> Series.applyLevel (fun (a, (b, _)) -> (a, b)) (fun s -> s.Sum() / (float s.ValueCount))
                |> Series.map (fun (a,b) v -> (a,b,v))
                |> Series.values
                |> Frame.ofValues
journeyAvg

Unnamed: 0_level_0,Steamdrift,Irondale,Cogburg,Gizbourne,Rustport,Copperhold,Leverstorm
Unnamed: 0_level_1,System.Double,System.Double,System.Double,System.Double,System.Double,System.Double,System.Double
Rustport,81.26016666666665,81.29276470588238,78.18102040816328,77.93559322033896,,,
Cogburg,73.52897435897435,79.76871886120996,,,74.86824561403509,75.44949494949492,
Gizbourne,,81.81487179487183,,,79.50499999999998,,76.73847826086958
Leverstorm,,72.00929577464791,,72.40650000000001,,74.50417322834645,
Copperhold,,75.15352941176471,77.2961616161616,,,,75.25553719008262
Irondale,,,80.0817894736842,81.24081818181813,78.35572289156624,75.06359116022095,74.70814814814814
Steamdrift,,,73.68425992779785,,81.98290909090908,,


In [None]:
let model_JourneyAvg: Model = fun a b -> journeyAvg.[a,b] :?> float

let eval_JourneyAvg = evaluate model_JourneyAvg
eval_JourneyAvg

Unnamed: 0_level_0,TRANSPORT,TIME,A,B,SPEED,prediction,error
Unnamed: 0_level_1,System.String,System.DateTime,System.String,System.String,System.Decimal,System.Double,System.Double
0,CARGO_17490,12/13/1859 10:56:05,Gizbourne,Rustport,72.17,77.93559322033896,33.24206518241854
1,CARGO_17491,12/14/1859 03:18:00,Rustport,Gizbourne,74.55,79.50499999999998,24.55202499999984
2,CARGO_17492,12/14/1859 15:18:37,Gizbourne,Leverstorm,72.1,72.40650000000001,0.09394225000000857
3,CARGO_17493,12/15/1859 00:56:21,Leverstorm,Irondale,65.32,74.70814814814814,88.13732565157748
4,CARGO_17494,12/15/1859 05:17:14,Irondale,Copperhold,79.35,75.15352941176471,17.610365397923808
5,CARGO_17495,12/15/1859 10:33:25,Copperhold,Irondale,65.47,75.06359116022095,92.03699134946962
6,CARGO_17495,12/15/1859 17:25:31,Irondale,Gizbourne,76.58,81.81487179487183,27.40388270874468
7,CARGO_17496,12/16/1859 00:22:25,Gizbourne,Irondale,75.7,81.24081818181813,30.70066612396631
8,CARGO_17497,12/16/1859 06:23:09,Irondale,Gizbourne,87.49,81.81487179487183,32.207080144641175
9,CARGO_17498,12/16/1859 13:27:25,Gizbourne,Irondale,74.39,81.24081818181813,46.933709760329826


In [None]:
let mse = eval_JourneyAvg?error.Sum() / (float eval_JourneyAvg.RowCount)
$"MSE is {mse}"

MSE is 52.551138978521806