In [1]:
%reload_ext iventure.magics
%ripl --seed 2 --plugins extensions.py

session_id: fsaad@probcomp-1.csail.mit.edu_2018-11-13T19:16:33.739477_A
Set seed of a new VentureScript RIPL to 2.00.
Loading plugin: extensions.py


In [2]:
venture_runtime = %get_ripl

In [3]:
import pandas as pd

In [4]:
%%venturescript
define linear_regression_outliers_program = (xs) ~> { 
    assume xs = ${xs};
    assume prob_outlier = 0.5;
    assume inlier_log_var ~ normal(0,2)  #params:1 ;
    assume outlier_log_var ~ normal(0,2) #params:2 ;
    assume slope ~ normal(0, 2)          #params:3 ;
    assume intercept ~ normal(0, 2)      #params:4 ;
    assume line = (x) -> { intercept + slope * x };
    assume is_outlier = mem((i) ~> {
        flip(prob_outlier) #outlier_status:integer(i)
    });
    assume y = mem((i) ~> {
        x = xs[i];
        variance = if (is_outlier(i)) { exp(inlier_log_var) } else { exp(outlier_log_var) };
        normal(line(x), sqrt(variance))
    })
};

In [5]:
%%venturescript
define extract_linear_regression_lightweight_trace = (trace) -> {
    run_in_trace(trace, sample(dict(
        ["slope", slope],
        ["intercept", intercept],         
        ["inlier_log_var", inlier_log_var],
        ["outlier_log_var", outlier_log_var],
        ["prob_outlier", prob_outlier]
    )))
};

In [6]:
%%venturescript
define make_symmetric_mh_update = (scope, proposal) -> {
    mh_correct(
        on_subproblem(
            scope,
            quote(all),
            symmetric_local_proposal(proposal)))
};

In [7]:
%%venturescript
define outlier_update = (i) -> mh_correct(
    on_subproblem(
        quote(outlier_status),
        integer(i),
        symmetric_local_proposal((outlier) -> not(outlier))));

In [8]:
def load_dataset(fname):
    train_df = pd.read_csv(fname)
    train_xs = train_df["xs"].tolist()
    train_ys = train_df["ys"].tolist()
    return [train_xs, train_ys]

from venture.lite.sp_help import deterministic_typed
import venture.lite.types as t
venture_runtime.bind_foreign_inference_sp('load_dataset',
    deterministic_typed(
        load_dataset,
        [t.StringType()], # input type signature
        t.HomogeneousListType(t.HomogeneousListType(t.NumberType()))))

In [10]:
%%venturescript
define run_custom_mcmc = (xs, ys, num_iters) -> {
    timer = start_timer();
    trace = new_trace();
    _ = run_in_trace(trace, {
    
        action(print("** num_iters:"));
        action(print(num_iters));
        
        // sample from prior
        linear_regression_outliers_program(xs);

        // observe dataset
        for_each_indexed(ys,
            (i, y) -> { observe y(${integer(i)}) = y; });

        // run markov chain
        repeat(num_iters, {
            gradient_ascent(minimal_subproblem(/?params/*), .000001, 10, 1);
            // lbfgs_optimize(minimal_subproblem(/?params/*));
            for_each(arange(size(xs)), outlier_update);
        })
    });
                 
    elapsed_ms = time_elapsed(timer);
    // return the lightweight trace
    dict(["trace", extract_linear_regression_lightweight_trace(trace)], ["elapsed", elapsed_ms])
};

In [11]:
%%venturescript
define do_experiment = () -> {
    train_dataset = load_dataset("../train.csv");
    xs = train_dataset[0];
    ys = train_dataset[1];
    num_steps_list = [integer(10)];
    num_replicates = 4;
    apply(dict, zip(num_steps_list, mapv((num_steps) -> {
        parallel_mapv((i) -> {
            run_custom_mcmc(xs, ys, num_steps)
        }, arange(num_replicates))
    }, num_steps_list)))
};

In [12]:
results = %venturescript do_experiment()

** num_iters:
** num_iters:
** num_iters:
** num_iters:
10
10
10
10


In [13]:
from collections import OrderedDict
def save_results(results):
    slopes = []
    intercepts = []
    prob_outliers = []
    inlier_log_vars = []
    outlier_log_vars = []
    elapsed = []
    num_steps_all = []
    for (num_steps, results) in results.iteritems():
        for result in results:
            num_steps_all.append(num_steps)
            trace = result["trace"]
            slopes.append(trace["slope"])
            intercepts.append(trace["intercept"])
            prob_outliers.append(trace["prob_outlier"])
            inlier_log_vars.append(trace["inlier_log_var"])
            outlier_log_vars.append(trace["outlier_log_var"])
            elapsed.append(result["elapsed"]);
    df = pd.DataFrame(OrderedDict([
        ("num_steps"       , num_steps_all),
        ("elapsed"         , elapsed),
        ("score"           , [0]*len(elapsed)),
        ("slope"           , slopes),
        ("intercept"       , intercepts),
        ("inlier_log_var"  , inlier_log_vars),
        ("outlier_log_var" , outlier_log_vars),
    ]))
    df.to_csv("venture_map.results.csv", index=False, header=False)

In [14]:
save_results(results)

In [15]:
cat venture_map.results.csv

10,326.843719959,0,0.469091577775,-0.968821142776,2.16164190788,0.536165667177
10,331.383980036,0,-0.723609892658,4.34657171555,-3.21393092166,0.181587218445
10,330.365732908,0,-1.55792146592,-1.2069884518,-1.46148300068,0.0815043763486
10,328.890064955,0,4.36127612351,-0.545213929504,1.89672323191,2.70028434867
