In [16]:
using OnlineStats, StatsBase, MLDataPattern, ProgressMeter, Plots
gr(size = (600, 400), fmt = :png)

Plots.GRBackend()

In [2]:
struct TraceBounds
    low::Vector
    mid::Vector
    high::Vector
    label::String
    function TraceBounds(l, m, h, label)
        new(l, m, h, label)
    end
end

In [3]:
@recipe function f(o::TraceBounds)
    ribbon --> (o.low, o.high)  # relative to o.mid
    fillalpha --> .1
    label --> o.label
    o.mid
end

In [4]:
function run_sim(x, y, loss, penalty, algorithm, rate, nreps=100)
    out = zeros(size(x, 1), nreps)
    @showprogress for rep in 1:nreps
        x2, y2 = shuffleobs((x, y), obsdim=1)
        o = StatLearn(size(x, 2), loss, penalty, algorithm, rate)
        for (i, ob) in enumerate(zip(eachrow(x2), y2))
            fit!(o, ob)
            out[i, rep] = OnlineStats.objective(o, x, y)
        end
    end
    mid = mapslices(median, out, 2)
    low = mapslices(x -> quantile(x, .05), out, 2)
    high = mapslices(x -> quantile(x, .95), out, 2)
    plot(TraceBounds(vec(mid - low), vec(mid), vec(high - mid), string(rate)))
end

function compare_sim(x, y, loss, penalty, alg, rates, nreps=100; kw...)
    p = plot(run_sim(x,y,loss,penalty,alg,rates[1],nreps); 
        title = string(alg), kw...)
    for i in 2:length(rates)
        plot!(p,run_sim(x, y, loss, penalty, alg, rates[i], nreps))
    end
    p
end

LoadError: [91msyntax: "\" is not a unary operator[39m

# Million Song Dataset

In [17]:
x = readcsv("YearPredictionMSD.txt")
y = x[:, 1]
x[:, 1] = 1.0
z = zscore(x, 1)
n, p = size(x)

(515345, 91)

In [26]:
@time o = fit!(LinReg(), (z,y))

  3.459677 seconds (530.11 k allocations: 16.564 MiB)


LinReg: n=515345 | value=[1998.4, 5.31165, -2.90537, -1.53944, 0.0547346, -0.337125, -2.82963, -0.0982026, -0.803531, -0.745805  …  -0.026573, -0.097789, 0.167741, 0.0947171, 0.422414, 0.012291, 0.202668, -0.416373, -0.256174, -0.0356841]

In [28]:
yhat = predict(o, x)

515345-element Array{Float64,1}:
  3385.42
  4698.12
  3142.09
  4564.56
  3449.68
  3576.75
  4595.38
  4220.65
  3368.89
  5570.32
  6171.21
  4001.18
  7654.67
     ⋮   
  5472.9 
  3466.08
 11376.0 
  3420.91
  7351.56
  6342.17
  3555.33
  3846.3 
  3394.25
  3120.71
  5185.15
  3307.85

In [7]:
# n, p = 1000, 5
# x = randn(n, p)
# y = x * linspace(-1, 1, p) + randn(n)


# l = .5L2DistLoss()
# p = NoPenalty()
# lrs = LearningRate.(.5:.2:.9)

# plot(
#     compare_sim(x, y, l, p, SGD(),     lrs),
#     compare_sim(x, y, l, p, ADAGRAD(), lrs),
#     compare_sim(x, y, l, p, RMSPROP(), lrs),
#     compare_sim(x, y, l, p, ADAM(),    lrs),
#     compare_sim(x, y, l, p, ADAMAX(),  lrs),
#     compare_sim(x, y, l, p, MSPI(),    lrs);
#     ylim = (0, 1.5), legend=false
# )
    

LoadError: [91mUndefVarError: compare_sim not defined[39m