In [1]:
using Plots
pyplot()
using ColoringNames
using CSVFiles
using DataFrames
pyplot()

Plots.PyPlotBackend()

In [2]:
runnum = "8"
function mklogdir(name)
    logdir = joinpath(pwd(), "logs", "dist", name * runnum)
    mkpath(logdir)
    logdir
end

mklogdir (generic function with 1 method)

In [3]:
function get_saveplot(mdlname)
    figpath = joinpath("demo", "dist", mdlname)
    mkpath(figpath)
    function saveplot(mdl, colorname)
        filename = joinpath(figpath, colorname*".png")
        savefig(plot_query(mdl, colorname), filename)
    end
end


function qualitative_demo(mdl, do_oov=true;
        demofun=display∘plot_query,
        #demofun=(mdl,name)->nothing
        #demofun=(mdl,name)->savefig(plot_query(mdl, name), name),
    )
        demofun(mdl,  "brownish green")
        demofun(mdl,  "brown")
        demofun(mdl,  "brownish")
        demofun(mdl,  "greenish")
        demofun(mdl,  "greenish brown")
        demofun(mdl,  "green")
        
        demofun(mdl,  "bluish grey")
        demofun(mdl,  "greyish blue")
        demofun(mdl, "pink-purple")
        demofun(mdl, "purple-pink")
        demofun(mdl, "brown-orange")
        demofun(mdl, "orange-brown")
        demofun(mdl, "orange-yellow")
        demofun(mdl, "yellow-orange")
        
        demofun(mdl, "purple")
        demofun(mdl, "purplish")
        demofun(mdl, "black")
        demofun(mdl, "white")
        
    
        demofun(mdl, "grey")
        do_oov && demofun(mdl, "Gray")
        do_oov && demofun(mdl, "gray")
        do_oov && demofun(mdl, "grey")
        do_oov && demofun(mdl, "Green")
        

        
        do_oov && demofun(mdl,  "ish")
end

qualitative_demo(mdl::TermToColorDistributionEmpirical; kwargs...) = qualitative_demo(mdl, false; kwargs...)

qualitative_demo (generic function with 3 methods)

In [4]:
const many_names = load_color_nameset()
const word_embs, vocab, enc = load_text_embeddings(keep_words=many_names)
const full_data = load_munroe_data(dev_as_train=false, dev_as_test=false, encoding_ = enc)

const ord_data = order_relevant_dataset(full_data);
const extrapo_data = extrapolation_dataset(full_data);

In [5]:
create_res_df() = DataFrame(method=String[], 
    perp=Float64[],
    perp_hue=Float64[],
    perp_sat=Float64[],
    perp_val=Float64[],
    mse_to_distmode=Float64[],
    mse_to_distmean=Float64[]
)

full_df = create_res_df()
extrapo_df = create_res_df()
ord_df = create_res_df()

function perform_evaluation(modeltype, name, smoothed)
    name *= smoothed ? "-smoothed" : ""
    info(name)
    
    function mdlfun(cldata)
        mdl = modeltype(enc, word_embs; n_steps=size(cldata.train.terms_padded,1))
        train!(mdl, cldata, Val{smoothed}(); log_dir = mklogdir(name))
        mdl
    end
    
    
    ###################
    if doextrapo(modeltype)
        extrapo_mdl = mdlfun(extrapo_data)
        println()
        @show res_extrapo = evaluate(extrapo_mdl, extrapo_data.test)
        res_extrapo[:method] = name * " Extrapolating"
        push!(extrapo_df, res_extrapo)
    end
    
    
    ######
    full_mdl = mdlfun(full_data)
    println()
    @show res_full = evaluate(full_mdl, full_data.test)
    res_full[:method] = name
    push!(full_df, res_full)
    
    qualitative_demo(full_mdl; demofun=get_saveplot(name))
    
    #######
    println()
    @show res_ord = evaluate(full_mdl, ord_data.test)
    res_ord[:method] = name
    push!(ord_df, res_ord)
    
    
    #######
    @show  res_nonextrapo = evaluate(full_mdl, extrapo_data.test)
    println()
    res_nonextrapo[:method] = name * " Non-extrapolating"
    push!(extrapo_df, res_nonextrapo)
    
    
    ####
    save("results/raw/dist_full.csv", full_df)
    save("results/raw/dist_extrapo.csv", extrapo_df)
    save("results/raw/dist_ord.csv", ord_df)
    
    full_mdl
end

function perform_evaluation(modeltype, name)
    perform_evaluation(modeltype, name, false)
    perform_evaluation(modeltype, name, true)
end

doextrapo(::Any) = true
doextrapo(::Type{TermToColorDistributionEmpirical}) = false

doextrapo (generic function with 2 methods)

In [None]:
namedmodels = [
    (TermToColorDistributionEmpirical, "Direct"),
    (TermToColorDistributionSOWE, "SOWE"),
    (TermToColorDistributionCNN, "CNN"),
    (TermToColorDistributionRNN, "RNN"),    
]

for (modeltype, name) in namedmodels
    perform_evaluation(modeltype, name)
end;

[1m[36mINFO: [39m[22m[36mDirect
[39m


res_full = evaluate(full_mdl, full_data.test) = Dict{Symbol,Any}(Pair{Symbol,Any}(:perp, 2.75356e6),Pair{Symbol,Any}(:perp_val, 175.293),Pair{Symbol,Any}(:mse_to_distmean, 0.0663561),Pair{Symbol,Any}(:perp_sat, 230.097),Pair{Symbol,Any}(:mse_to_distmode, 0.134059),Pair{Symbol,Any}(:perp_hue, 68.2684))
(h_max, s_max, v_max) = (0.20392156862745098, 0.6705882352941176, 0.49411764705882355)
(h_max, s_max, v_max) = (0.08235294117647059, 1.0, 0.5176470588235295)
(h_max, s_max, v_max) = (0.07058823529411765, 0.6196078431372549, 0.6)
(h_max, s_max, v_max) = (0.4196078431372549, 0.6588235294117647, 0.7333333333333333)
(h_max, s_max, v_max) = (0.1450980392156863, 0.9333333333333333, 0.4235294117647059)
(h_max, s_max, v_max) = (0.3411764705882353, 1.0, 0.803921568627451)
(h_max, s_max, v_max) = (0.592156862745098, 0.24705882352941178, 0.5843137254901961)
(h_max, s_max, v_max) = (0.5882352941176471, 0.27058823529411763, 0.6823529411764706)
(h_max, s_max, v_max) = (0.8392156862745098, 0.5254901960

[1m[36mINFO: [39m[22m[36mDirect-smoothed
[39m


res_full = evaluate(full_mdl, full_data.test) = Dict{Symbol,Any}(Pair{Symbol,Any}(:perp, 1.19449e6),Pair{Symbol,Any}(:perp_val, 122.143),Pair{Symbol,Any}(:mse_to_distmean, 0.0663674),Pair{Symbol,Any}(:perp_sat, 163.754),Pair{Symbol,Any}(:mse_to_distmode, 0.109937),Pair{Symbol,Any}(:perp_hue, 59.7205))
(h_max, s_max, v_max) = (0.16862745098039217, 0.6470588235294118, 0.49411764705882355)
(h_max, s_max, v_max) = (0.08627450980392157, 0.7568627450980392, 0.5490196078431373)
(h_max, s_max, v_max) = (0.058823529411764705, 0.49411764705882355, 0.5843137254901961)
(h_max, s_max, v_max) = (0.41568627450980394, 0.6470588235294118, 0.7372549019607844)
(h_max, s_max, v_max) = (0.1450980392156863, 0.9215686274509803, 0.5098039215686274)
(h_max, s_max, v_max) = (0.33725490196078434, 0.996078431372549, 0.803921568627451)
(h_max, s_max, v_max) = (0.5803921568627451, 0.20784313725490197, 0.6980392156862745)
(h_max, s_max, v_max) = (0.5490196078431373, 0.29411764705882354, 0.6862745098039216)
(h_max, 

[1m[36mINFO: [39m[22m[36mSOWE
[39m2018-06-18 18:39:31.589094: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2


# Oracle


In [None]:
oracle_cldata = load_munroe_data(dev_as_train=true, dev_as_test=true)#, encoding_ = enc)
oracle_noml_unsmoothed =  TermToColorDistributionEmpirical(256)
train!(oracle_noml_unsmoothed, oracle_cldata, Val{false}(); remove_zeros_hack=false)
@show evaluate(oracle_noml_unsmoothed, oracle_cldata.dev)

In [None]:
qualitative_demo(oracle_noml_unsmoothed,
    demofun = get_saveplot("oracle_noml_unsmoothed"))

In [None]:
plot_query(oracle_noml_unsmoothed, "black")

In [None]:
ColoringNames.order_relevant_name_pairs(full_data.train)

In [None]:
38*2

In [None]:
ind = findfirst(full_data.dev.texts.=="light blue-green")

In [None]:
function mdlfun(modeltype, cldata, smoothed)
    mdl = modeltype(enc, word_embs; n_steps=size(cldata.train.terms_padded,1))
    train!(mdl, cldata, Val{smoothed}(); log_dir = mklogdir(name))
    mdl
end

sowe = mdlfun(TermToColorDistributionSOWE, full_data, false)

In [None]:
        plot_query(mdl,  "greenish brown") |> display
        plot_query(mdl,  "green")|> display