In [None]:
# Lets load some helpful packages
#  For dealing with tabular data
using DataFrames, DataFramesMeta
#  Interacting with the web
using JSON, Requests
#  and doing some proper statistics
using Distributions, GLM, StatsBase

# We're also going to create some helper functions to simplify the path structure
function getdata(s, datekey=nothing, datestr="yyyy-mm-dd")
    df = readtable(s)
    if !(datekey == nothing)
        df[datekey] = convert(DataArray{Date, 1}, map(d->Dates.Date(d, datestr), df[datekey]))
    end
    df
end

In [None]:
"""
    This function imputes missing auction values for the reference commodity products. 

    Values may be missing for a number of reasons including if the initial offer quanity is
    under-subscribed.
"""
function get_imputed_gdt()
    gdt = getdata("gdt_auctions.csv", :Date, "dd/mm/yyyy")

    # Impute BMP
    rename!(gdt, :BMP, :BMPold)
    bmp_train = @where(gdt, :BMPold .!= "n.a.")
    bmp_train[:BMPold] = convert(DataArray{Float64}, map(s->parse(Float64, s), bmp_train[:BMPold]))
    bmp_model = fit(LinearModel, @formula(BMPold~AMF+SMP+WMP), bmp_train)
#     bmp_model = fit(LinearModel, BMPold~AMF+SMP+WMP, bmp_train)
    
    bmp_test = @where(gdt, :BMPold .== "n.a.")
    gdt[:BMP] = 0.
    gdt[:BMP][gdt[:BMPold] .== "n.a."] = predict(bmp_model, bmp_test)
    gdt[:BMP][gdt[:BMPold] .!= "n.a."] = map(i->parse(Float64, i), gdt[:BMPold][gdt[:BMPold] .!= "n.a."])

    # Impute BUT
    rename!(gdt, :BUT, :BUTold)
    but_train = @where(gdt, :BUTold .!= "n.a.")
    but_train[:BUTold] = convert(DataArray{Float64}, map(s->parse(Float64, s), but_train[:BUTold]))
    but_model = fit(LinearModel, @formula(BUTold~AMF+SMP+WMP), but_train)
#     but_model = fit(LinearModel, BUTold~AMF+SMP+WMP, but_train)
    but_test = @where(gdt, :BUTold .== "n.a.")
    gdt[:BUT] = 0.
    gdt[:BUT][gdt[:BUTold] .== "n.a."] = predict(but_model, but_test)
    gdt[:BUT][gdt[:BUTold] .!= "n.a."] = map(i->parse(Float64, i), gdt[:BUTold][gdt[:BUTold] .!= "n.a."])
    delete!(gdt, :BUTold)
    delete!(gdt, :BMPold)
    gdt
end

In [None]:
"""
    Calculate the product mix of RCP in Fonterra's basket.
"""
function get_product_mix()
    qprod = getdata("fonterra_production.csv")

    for key in [:WMP, :SMP, :BUT, :AMF, :BMP]
        qprod[Symbol("$(key)p")] = qprod[key] ./ qprod[:Supply]
    end

    periods = ["Jun-Aug", "Sep-Nov", "Dec-Feb", "Mar-May"]
    qprod[:sPeriod] = ""
    @byrow! qprod :sPeriod = periods[:Period]
    qprod[:SeasonPeriod] = map((s,p)->string(s, " ",p), qprod[:Season], qprod[:sPeriod])
    qprod = @select(qprod, :Season, :sPeriod, :SeasonPeriod, :WMPp, :SMPp, :BUTp, :AMFp, :BMPp)
    names!(qprod, [:Season, :sPeriod, :SeasonPeriod, :WMP, :SMP, :BUT, :AMF, :BMP])

    production_data = melt(qprod, [:Season, :sPeriod, :SeasonPeriod])
    product_lambda = by(production_data, [:variable, :sPeriod]) do _df
            mean(_df[:value]) 
        end
    names!(product_lambda, [:product, :period, :weight])
    product_lambda
    product_mix = join(DataFrame(
        period = ["Jun-Aug", "Jun-Aug", "Jun-Aug", "Sep-Nov", "Sep-Nov", "Sep-Nov", "Dec-Feb", "Dec-Feb", "Dec-Feb", "Mar-May", "Mar-May", "Mar-May"],
        month  = 1:12
    ), product_lambda, on=[:period])
    delete!(product_mix, :period)
    product_mix
end

In [None]:
"""
    Calculate the product-mix weighted spot price in each Trading period
"""
function getspotprice()
    gdt = get_imputed_gdt()
    gdt[:month] = Dates.month.(gdt[:Date])
    product_mix = get_product_mix()

    tmp_gdt = join(gdt, unstack(product_mix, :product, :weight), on=[:month])
    tmp_gdt[:spot] = 0.0
    for product in [:AMF, :SMP, :WMP, :BMP, :BUT]
       tmp_gdt[:spot] += tmp_gdt[product] .* tmp_gdt[Symbol(product, :_1)] 
    end
    sort!(tmp_gdt, cols=:Date)
    tmp_gdt[:, [:Date, :TE, :spot]]
end

In [None]:
function get_sales_curve()
    df = getdata("fonterra_contracts.csv")
    # rename some columns
    names!(df, [:Month, Symbol("2011/12"), Symbol("2012/13"), Symbol("2013/14"), Symbol("2014/15"), Symbol("2015/16")])

    # go from cumulative to proportional
    for i=reverse(2:size(df, 1))
        for j in 2:size(df, 2)
            df[i, j] = df[i, j] - df[i-1, j]
        end
    end

    df = melt(df, :Month)
    df = join(df, by(df, :variable) do _df
        sum(_df[:value])
        end, on=[:variable])
    df[:Proportion] = df[:value] ./ df[:x1]
    sales_data = @select(df, :Month, :Proportion, :variable)
    rename!(sales_data, :variable, :Season)
    unique_months = unique(sales_data[:Month])
    sales_curve = map(m->mean(@where(sales_data, :Month.==m)[:Proportion]), unique_months)[2:end]
    sales_curve ./= sum(sales_curve)
    sales_curve = repeat(sales_curve, inner=4) / 4
    sales_curve .= round.(sales_curve, 5)
end

open("salescurve.json", "w") do io
    write(io, JSON.json(get_sales_curve()))
end
sum(get_sales_curve())

In [None]:
using Plots, StatPlots
const mm = Plots.mm
const pt = Plots.pt
fntsm = Plots.font("times", 10.0pt)
fntlg = Plots.font("times", 12.0pt)
default(titlefont=fntlg, guidefont=fntlg, tickfont=fntsm, legendfont=fntsm,left_margin=10mm,bottom_margin=7.5mm)
default(size=(800,600),top_margin=0mm, right_margin=0mm)
gr()

In [None]:
function generatemodel1()
    forecasts = getdata("fonterra_forecasts.csv", :Date)
    changes = by(forecasts, :Season) do d
        DataFrame(
            change = d[:Forecast][2:end] - d[:Forecast][1:end-1],
            Date   = d[:Date][2:end],
        ) 
    end
    changes[:week] = clamp.(round.(Int, Dates.value.(changes[:Date] - Dates.Date.(changes[:Season], 6, 1)) / 7), 1, 52)

    dist = Vector{Float64}[]
    for wk in 1:52
        push!(dist, Float64[])
        tmp = changes[changes[:week] .== wk, :]
        for chg in tmp[:change]
            push!(dist[wk], chg)
        end
        for i in length(dist[wk])+1:8
            push!(dist[wk], 0.0)
        end
    end
    dist
    function simulate(dist)
        y = zeros(52)
        for i in 2:52
            y[i] = y[i-1] + rand(dist[i])
        end
        y
    end
    plotsimulation!(dist; color="gray", alpha=0.5, w=1) = plot!(1:52, 6+simulate(dist),c=color,alpha=alpha, w=w)
    r = rand(1:1000)
    @show r
    srand(845)
    plot(size=(750,500), left_margin=10mm, bottom_margin=10mm, top_margin=5mm)
    for i in 1:50
        plotsimulation!(dist)
    end
    plotsimulation!(dist, color="red", alpha=1, w=2)
    plot!(title="(b)")
    plt1 = plot!(ylabel="Forecast end-of-season milk price\n(\$/kg)", xlabel="Week of Year\n",legend=false, ylims=(3, 9))
#     savefig("modelone.pdf")
    
    forecasts = getdata("fonterra_forecasts.csv", :Date)
    plot(size=(750,500), left_margin=10mm, bottom_margin=10mm, right_margin=10mm, top_margin=5mm)
    plot!(forecasts, :Date, :Forecast, group=:Season, linetype=:step, w=2, c="#00467F")
    plot!(ylabel="Forecast end-of-season milk price\n(\$/kg)", xlabel="Date of forecast\n", ylims=(3, 9))
    function annotateplot!(date, value)
        x = Int(Dates.value(date))
        yr = Dates.value(Dates.Year(date))
        annotate!([(x+250, value, "$(rpad(value, 4, "0"))")])
        scatter!([date], [value], c="#00467F")
    end
    by(forecasts, :Season) do d
        annotateplot!(d[:Date][end], d[:Forecast][end])
    end
    plot!(title="(a)", xticks=(Dates.Date.(2010:2:2017, 1,1), 2010:2:2017))
    plt2 = plot!(legend=false)
#     savefig("forecasts.pdf")
    plt3 = plot(plt2, plt1, layout=(1,2), size=(1000,375))
    savefig("modeloneprices.pdf")
    println("E[x] = $(mean(vcat(dist...)))")
    
    function discretedistribution(x)
        observations = unique(x) 
        probabilities = [sum(x .== o) / length(x) for o in observations]
        observations, probabilities
    end
    open("model_one_observations.json", "w") do io
        write(io, JSON.json([round.(discretedistribution(d)[1], 2) for d in dist]))
    end
    open("model_one_probabilities.json", "w") do io
        write(io, JSON.json([discretedistribution(d)[2] for d in dist]))
    end
    open("model_one_futures.json", "w") do io
        write(io, JSON.json([mean(vcat(dist[i+1:end]...)) for i in 1:51]))
    end
    
    () -> 6.0 + simulate(dist), dist
end
model1, dist = generatemodel1()

In [None]:
function generatemodel2()
    spot = getspotprice()
    spot[:nzd] = spot[:spot] ./ 0.75 / 1000 - 2.10
    model = fit(LinearModel, @formula(y~x1), DataFrame(
                    y  = (spot[:nzd][3:end]),
                    x1 = (spot[:nzd][2:end-1]),
                )
            )

    function estimate(model, x, idx)
        dot(coef(model), vcat(1, x[idx:-1:(idx - length(coef(model))+2)]))
    end

    function simulate!(spot, x0, model, N, islog=true)
        zz = zeros(N)    
        if islog
            zz[1:length(x0)] .= log.(x0)
        else
            zz[1:length(x0)] .= x0
        end
        for i in (length(x0)+1):N
            zz[i] = estimate(model, zz, i-1) + rand(GLM.residuals(model))
        end
        if islog
            zz .= exp.(zz)
        end
        
        zz
    end
    function plottrace!(spot, x0, model, N, islog=true)
        zz = simulate!(spot, x0, model, N, islog)
        plot!(spot[:Date], zz, linealpha=0.2, color="gray")
        zz
    end

    plot(size=(750,500), left_margin=5mm, bottom_margin=10mm)
    plot!(spot, :Date, :nzd, legend=false, linetype=:step, w=2, color="#00467F")
    plot!(ylims=(0, 12), xlabel="Date of Global Dairy Trade Auction\n", ylabel="Spot Price\n(\$/kg)")
    hline!([mean(spot[:nzd])], linestyle=:dash, w=2, color="#e65100")

    seed = rand(1:1000)
    @show seed
    srand(285)
    for i in 1:50
        plottrace!(spot, [6.3], model, size(spot, 1), false)
    end
    zz = plottrace!(spot, [6.3], model, size(spot, 1), false)
    plot!(spot[:Date], zz, linealpha=1, color="red", linetype=:step)
    plt = plot!(legend=false, ylims=(0, 12))
    savefig("spotprice.pdf")
    () -> simulate!(spot, [6.0], model, 52, false)
end
model2 = generatemodel2();

In [None]:
model_one_distribution = [model1()[end] for i in 1:1000]
model_two_distribution = [dot(model2(), get_sales_curve()) for i in 1:1000]
plot(size=(750,500), left_margin=10mm, bottom_margin=10mm, top_margin=5mm)
density!(model_one_distribution, label="Model One", w=3, color="#00467F", alpha=1, fillalpha=0.75)
density!(model_two_distribution, label="Model Two", w=3, linestyle=:dot, c="#e65100", fillalpha=0.75, alpha=1)
density!([6.1, 7.6, 6.08, 5.84, 8.40, 4.40, 3.90, 6.13], label="Actual", w=2,c="black")
plt2 = plot!(xlabel="End-of-season milk price \$/kg\n", ylabel="", yticks=false)
savefig("model_distributions.pdf")