In [None]:
%reload_ext iventure.magics

In [None]:
venture_runtime = %get_ripl
venture_runtime.load_plugin('extensions.py')

In [None]:
from venture.lite.psp import RandomPSP
import math

HALF_LOG2PI = 0.5 * math.log(2 * math.pi)

def logDensityNormal(x, mu, sigma):
    deviation = x - mu
    return - math.log(sigma) - HALF_LOG2PI - (0.5 * deviation * deviation / (sigma * sigma))    

class TwoNormalsOutputPSP(RandomPSP):
    
    def simulateNumeric(self, params, np_rng):
        (mu, prob1, sigma1, sigma2) = params
        if np_rng.uniform() < prob1:
            sigma = sigma1
        else:
            sigma = sigma2
        return np_rng.normal(loc=mu, scale=sigma)
            
    def simulate(self, args):
        return self.simulateNumeric(args.operandValues(), args.np_prng())
    
    def logDensityNumeric(self, x, params):
        (mu, prob1, sigma1, sigma2) = params
        l1 = math.log(prob1) + logDensityNormal(x, mu, sigma1)
        l2 = math.log1p(-prob1) + logDensityNormal(x, mu, sigma2)
        m = max(l1, l2)
        return m + math.log(math.exp(l1 - m) + math.exp(l2 - m))
    
    def logDensity(self, x, args):
        return self.logDensityNumeric(x, args.operandValues())

In [None]:
from venture.lite.sp_help import typed_nr
import venture.lite.types as t
venture_runtime.bind_foreign_sp('two_normals', typed_nr(TwoNormalsOutputPSP(),
    [t.NumberType(), t.ProbabilityType(), t.PositiveType(), t.PositiveType()], t.NumberType()))

In [None]:
import pandas as pd

In [None]:
%%venturescript
define sigmoid = (x) -> (exp(x) / (1 + exp(x)));
define linear_regression_outliers_program = (xs) ~> { 
    assume sigmoid = (x) -> (exp(x) / (1 + exp(x)));
    assume xs = ${xs};
    assume prob_outlier = sigmoid(
        normal(0, 2) #logit_prob_outlier
    );
    assume inlier_log_var ~ normal(0, 1) #inlier_log_var ;
    assume outlier_log_var ~ normal(0, 1) #outlier_log_var ;
    assume slope ~ normal(0, 2) #slope ;
    assume intercept ~ normal(0, 2) #intercept ;
    assume line = (x) -> { intercept + slope * x };
    assume inlier_std = sqrt(exp(inlier_log_var));
    assume outlier_std = sqrt(exp(outlier_log_var));
    assume y = mem((i) -> {
        y_mean = line(xs[i]);
        two_normals(y_mean, prob_outlier, inlier_std, outlier_std)
    });
};

In [None]:
%%venturescript
define extract_linear_regression_lightweight_trace = (trace) -> {
    run_in_trace(trace, sample(dict(
        ["slope", slope],
        ["intercept", intercept],         
        ["inlier_log_var", inlier_log_var],
        ["outlier_log_var", outlier_log_var],
        ["prob_outlier", prob_outlier]
    )))
};

In [None]:
%%venturescript
define make_symmetric_mh_update = (scope, proposal) -> {
    mh_correct(
        on_subproblem(
            scope,
            quote(all),
            symmetric_local_proposal(proposal)))
};

In [None]:
%%venturescript
define rw = (v) -> normal(v, 0.5);
define slope_update = make_symmetric_mh_update(quote(slope), rw);
define intercept_update = make_symmetric_mh_update(quote(intercept), rw);
define inlier_log_var_update = make_symmetric_mh_update(quote(inlier_log_var), rw);
define outlier_log_var_update = make_symmetric_mh_update(quote(outlier_log_var), rw);
define logit_prob_outlier_update = make_symmetric_mh_update(quote(logit_prob_outlier), rw);

In [None]:
def load_dataset(fname):
    train_df = pd.read_csv(fname)
    train_xs = train_df["xs"].tolist()
    train_ys = train_df["ys"].tolist()
    return [train_xs, train_ys]

from venture.lite.sp_help import deterministic_typed
import venture.lite.types as t
venture_runtime.bind_foreign_inference_sp('load_dataset',
    deterministic_typed(
        load_dataset,
        [t.StringType()], # input type signature
        t.HomogeneousListType(t.HomogeneousListType(t.NumberType()))))

In [None]:
%%venturescript
define run_custom_mcmc = (xs, ys, num_iters) -> {
    timer = start_timer();
    trace = new_trace();
    _ = run_in_trace(trace, {
        
        model_import_foreign("two_normals");
        action(print(num_iters));
        
        // sample from prior
        linear_regression_outliers_program(xs);

        // observe dataset
        for_each_indexed(ys,
            (i, yi) -> { observe y(${integer(i)}) = yi; });

        // run markov chain
        repeat(num_iters, {
            slope_update;
            intercept_update;
            inlier_log_var_update;
            outlier_log_var_update;
            logit_prob_outlier_update
        })
    });
                 
    elapsed_ms = time_elapsed(timer) * 1000;
    // return the lightweight trace
    dict(["trace", extract_linear_regression_lightweight_trace(trace)], ["elapsed", elapsed_ms])
};

In [None]:
%%venturescript
define do_experiment = () -> {
    train_dataset = load_dataset("../train.csv");
    xs = train_dataset[0];
    ys = train_dataset[1];
    num_steps_list = [integer(100)];
    num_replicates = 4;
    apply(dict, zip(num_steps_list, mapv((num_steps) -> {
        parallel_mapv((i) -> {            
            run_custom_mcmc(xs, ys, num_steps)
        }, arange(num_replicates))
    }, num_steps_list)))
};

In [None]:
results = %venturescript do_experiment()

In [None]:
def save_results(results):
    slopes = []
    intercepts = []
    prob_outliers = []
    inlier_log_vars = []
    outlier_log_vars = []
    elapsed = []
    num_steps_all = []
    for (num_steps, results) in results.iteritems():
        for result in results:
            num_steps_all.append(num_steps)
            trace = result["trace"]
            slopes.append(trace["slope"])
            intercepts.append(trace["intercept"])
            prob_outliers.append(trace["prob_outlier"])
            inlier_log_vars.append(trace["inlier_log_var"])
            outlier_log_vars.append(trace["outlier_log_var"])
            elapsed.append(result["elapsed"]);
    df = pd.DataFrame({
        "slope" : slopes,
        "intercept" : intercepts,
        "prob_outlier" : prob_outliers,
        "inlier_log_var" : inlier_log_vars,
        "outlier_log_var" : outlier_log_vars,
        "elapsed" : elapsed,
        "num_steps" : num_steps_all})
    df.to_csv("venture.results.csv")

In [None]:
save_results(results)