In [1]:
%reload_ext iventure.magics
%ripl --seed 2 --plugins extensions.py

session_id: fsaad@probcomp-1.csail.mit.edu_2018-11-13T14:21:41.162657_5
Set seed of a new VentureScript RIPL to 2.00.
Loading plugin: extensions.py


In [2]:
venture_runtime = %get_ripl

In [3]:
import pandas as pd

In [4]:
%%venturescript
define linear_regression_outliers_program = (xs) ~> { 
    assume xs = ${xs};
    assume prob_outlier = 0.5;
    assume inlier_log_var ~ normal(0,2)  #params:1 ;
    assume outlier_log_var ~ normal(0,2) #params:2 ;
    assume slope ~ normal(0, 2)          #params:3 ;
    assume intercept ~ normal(0, 2)      #params:4 ;
    assume line = (x) -> { intercept + slope * x };
    assume is_outlier = mem((i) ~> {
        flip(prob_outlier) #outlier_status:integer(i)
    });
    assume y = mem((i) ~> {
        x = xs[i];
        variance = if (is_outlier(i)) { exp(inlier_log_var) } else { exp(outlier_log_var) };
        normal(line(x), sqrt(variance))
    })
};

In [5]:
%%venturescript
define extract_linear_regression_lightweight_trace = (trace) -> {
    run_in_trace(trace, sample(dict(
        ["slope", slope],
        ["intercept", intercept],         
        ["inlier_log_var", inlier_log_var],
        ["outlier_log_var", outlier_log_var],
        ["prob_outlier", prob_outlier]
    )))
};

In [6]:
%%venturescript
define make_symmetric_mh_update = (scope, proposal) -> {
    mh_correct(
        on_subproblem(
            scope,
            quote(all),
            symmetric_local_proposal(proposal)))
};

In [7]:
%%venturescript
define outlier_update = (i) -> mh_correct(
    on_subproblem(
        quote(outlier_status),
        integer(i),
        symmetric_local_proposal((outlier) -> not(outlier))));

In [8]:
def load_dataset(fname):
    train_df = pd.read_csv(fname)
    train_xs = train_df["xs"].tolist()
    train_ys = train_df["ys"].tolist()
    return [train_xs, train_ys]

from venture.lite.sp_help import deterministic_typed
import venture.lite.types as t
venture_runtime.bind_foreign_inference_sp('load_dataset',
    deterministic_typed(
        load_dataset,
        [t.StringType()], # input type signature
        t.HomogeneousListType(t.HomogeneousListType(t.NumberType()))))

In [9]:
%%venturescript
define run_custom_mcmc = (xs, ys, num_iters) -> {
    timer = start_timer();
    trace = new_trace();
    _ = run_in_trace(trace, {
    
        action(print("** num_iters:"));
        action(print(num_iters));
        
        // sample from prior
        linear_regression_outliers_program(xs);

        // observe dataset
        for_each_indexed(ys,
            (i, y) -> { observe y(${integer(i)}) = y; });

        // run markov chain
        repeat(num_iters, {
            lbfgs_optimize(minimal_subproblem(/?params/*));
            for_each(arange(size(xs)), outlier_update);
        })
    });
                 
    elapsed_ms = time_elapsed(timer);
    // return the lightweight trace
    dict(["trace", extract_linear_regression_lightweight_trace(trace)], ["elapsed", elapsed_ms])
};

In [10]:
%%venturescript
define do_experiment = () -> {
    train_dataset = load_dataset("../train.csv");
    xs = train_dataset[0];
    ys = train_dataset[1];
    num_steps_list = [integer(10)];
    num_replicates = 4;
    apply(dict, zip(num_steps_list, mapv((num_steps) -> {
        parallel_mapv((i) -> {
            run_custom_mcmc(xs, ys, num_steps)
        }, arange(num_replicates))
    }, num_steps_list)))
};

In [11]:
results = %venturescript do_experiment()

** num_iters:
10
** num_iters:
** num_iters:
** num_iters:
10
10
10


In [12]:
def save_results(results):
    slopes = []
    intercepts = []
    prob_outliers = []
    inlier_log_vars = []
    outlier_log_vars = []
    elapsed = []
    num_steps_all = []
    for (num_steps, results) in results.iteritems():
        for result in results:
            num_steps_all.append(num_steps)
            trace = result["trace"]
            slopes.append(trace["slope"])
            intercepts.append(trace["intercept"])
            prob_outliers.append(trace["prob_outlier"])
            inlier_log_vars.append(trace["inlier_log_var"])
            outlier_log_vars.append(trace["outlier_log_var"])
            elapsed.append(result["elapsed"]);
    df = pd.DataFrame({
        "slope" : slopes,
        "intercept" : intercepts,
        "prob_outlier" : prob_outliers,
        "inlier_log_var" : inlier_log_vars,
        "outlier_log_var" : outlier_log_vars,
        "elapsed" : elapsed,
        "num_steps" : num_steps_all
    })
    df.to_csv("venture_map.results.csv")

In [13]:
save_results(results)

In [14]:
cat venture_map.results.csv

,elapsed,inlier_log_var,intercept,num_steps,outlier_log_var,prob_outlier,slope
0,597.510355949,3.32486978887,1.9906612877,10,-1.36926030655,0.5,-1.00713681416
1,582.223300934,-1.2245844344,2.00555531362,10,3.35312730604,0.5,-1.00379758199
2,587.207060099,3.33313491115,2.01840325275,10,-1.24626996078,0.5,-1.01731973846
3,598.618225098,3.33352879552,2.00644934445,10,-1.32267637257,0.5,-1.01331119676
