In [None]:
using Plots, StatsPlots
using DataFrames
using CSV
using Statistics

In [None]:
combpath = "../data/combined-cluster.csv"
df = CSV.File(combpath) |> DataFrame
@show names(df)
@show eltype.(eachcol(df));

In [None]:
# Flatten sweep data
strtovec(str) = parse.(Float64,split(chop(str,head=1),","))
df.lnsvec = strtovec.(df.lns)
df.misvec = strtovec.(df.mis)

# Due to a bug in the metric code where the first L elements in mis and lns are undef,
# we take the last L - 2 elements from each 
df.sweeplen = df.L .- 2
df.rawlen = length.(df.lnsvec)
df.lnsvec = getindex.(df.lnsvec, range.(df.rawlen .- df.sweeplen .+ 1, df.rawlen))
df.misvec = getindex.(df.misvec, range.(df.rawlen .- df.sweeplen .+ 1, df.rawlen))

# Explode the df so there is only one mi and ln value per record
df.sweepind = collect.(range.(1, df.sweeplen))
df = flatten(df, :sweepind)
df.ln = getindex.(df.lnsvec, df.sweepind)
df.mi = getindex.(df.misvec, df.sweepind)

# Drop unnecessary columns
select!(df, Not([:lns, :mis, :lnsvec, :misvec, :rawlen]))

In [None]:
dfL = df[df.L .== 25, :]
params = [:T, :L, :inner, :noise, :outer]
metrics = [:trace, :svn, :mi, :ln]
means = combine(groupby(dfL, [params..., :t]), metrics .=> mean, metrics .=> std)
sort!(means, [:t, :L])

In [None]:
@df means plot(:t, :trace_mean, group=:inner, ribbon=:trace_std, title="Trace", xlabel="T", ylabel="Trace")

In [None]:
@df means plot(:t, :svn_mean, group=:inner, ribbon=:svn_std, title="SvN", xlabel="T", ylabel="SvN", leg_title="Inner")

## Sweep data

In [None]:
sweepmeans = combine(groupby(dfL, [params..., :t, :sweepind]), metrics .=> mean, metrics .=> std)
sort!(sweepmeans, [:t, :sweepind, :L])

In [None]:
data = sweepmeans[(sweepmeans.t .== 8) .& (sweepmeans.inner .== 1), :]
@df data scatter(:sweepind, :ln_mean, ribbon=:ln_std, group=:inner)