In [1]:
%reload_ext iventure.magics

session_id: fsaad@probcomp-1.csail.mit.edu_2019-04-10T14:38:53.644195_F


In [2]:
venture_runtime = %get_ripl
venture_runtime.load_plugin('extensions.py')

In [3]:
from venture.lite.psp import RandomPSP
import math

HALF_LOG2PI = 0.5 * math.log(2 * math.pi)

def logDensityNormal(x, mu, sigma):
    deviation = x - mu
    return - math.log(sigma) - HALF_LOG2PI - (0.5 * deviation * deviation / (sigma * sigma))    

class TwoNormalsOutputPSP(RandomPSP):
    
    def simulateNumeric(self, params, np_rng):
        (mu, prob1, sigma1, sigma2) = params
        if np_rng.uniform() < prob1:
            sigma = sigma1
        else:
            sigma = sigma2
        return np_rng.normal(loc=mu, scale=sigma)
            
    def simulate(self, args):
        return self.simulateNumeric(args.operandValues(), args.np_prng())
    
    def logDensityNumeric(self, x, params):
        (mu, prob1, sigma1, sigma2) = params
        l1 = math.log(prob1) + logDensityNormal(x, mu, sigma1)
        l2 = math.log1p(-prob1) + logDensityNormal(x, mu, sigma2)
        m = max(l1, l2)
        return m + math.log(math.exp(l1 - m) + math.exp(l2 - m))
    
    def logDensity(self, x, args):
        return self.logDensityNumeric(x, args.operandValues())

In [4]:
from venture.lite.sp_help import typed_nr
import venture.lite.types as t
venture_runtime.bind_foreign_sp('two_normals', typed_nr(TwoNormalsOutputPSP(),
    [t.NumberType(), t.ProbabilityType(), t.PositiveType(), t.PositiveType()], t.NumberType()))

In [5]:
import pandas as pd

In [6]:
%%venturescript
define sigmoid = (x) -> (exp(x) / (1 + exp(x)));
define linear_regression_outliers_program = (xs) ~> { 
    assume sigmoid = (x) -> (exp(x) / (1 + exp(x)));
    assume xs = ${xs};
    assume prob_outlier = .5;
    assume inlier_log_var ~ normal(0, 1) #inlier_log_var ;
    assume outlier_log_var ~ normal(0, 1) #outlier_log_var ;
    assume slope ~ normal(0, 2) #slope ;
    assume intercept ~ normal(0, 2) #intercept ;
    assume line = (x) -> { intercept + slope * x };
    assume inlier_std = sqrt(exp(inlier_log_var));
    assume outlier_std = sqrt(exp(outlier_log_var));
    assume y = mem((i) -> {
        y_mean = line(xs[i]);
        two_normals(y_mean, prob_outlier, inlier_std, outlier_std)
    });
};

In [7]:
%%venturescript
define make_symmetric_mh_update = (scope, proposal) -> {
    mh_correct(
        on_subproblem(
            scope,
            quote(all),
            symmetric_local_proposal(proposal)))
};

In [8]:
%%venturescript
define rw = (v) -> normal(v, 0.5);
define slope_update = make_symmetric_mh_update(quote(slope), rw);
define intercept_update = make_symmetric_mh_update(quote(intercept), rw);
define inlier_log_var_update = make_symmetric_mh_update(quote(inlier_log_var), rw);
define outlier_log_var_update = make_symmetric_mh_update(quote(outlier_log_var), rw);

In [9]:
def load_dataset(fname):
    train_df = pd.read_csv(fname)
    train_xs = train_df["xs"].tolist()
    train_ys = train_df["ys"].tolist()
    return [train_xs, train_ys]

from venture.lite.sp_help import deterministic_typed
import venture.lite.types as t
venture_runtime.bind_foreign_inference_sp('load_dataset',
    deterministic_typed(
        load_dataset,
        [t.StringType()], # input type signature
        t.HomogeneousListType(t.HomogeneousListType(t.NumberType()))))

In [10]:
%%venturescript
define run_custom_mcmc = (xs, ys, num_iters) -> {

    run_inference_step = (i) -> {
        timer = start_timer();
        repeat(1, {
            slope_update;
            intercept_update;
            inlier_log_var_update;
            outlier_log_var_update;
        });
        result = run(sample(dict(
            ["slope", slope],
            ["intercept", intercept],         
            ["inlier_log_var", inlier_log_var],
            ["outlier_log_var", outlier_log_var],
            ["prob_outlier", prob_outlier])));
        elapsed_ms = time_elapsed(timer);
        return([elapsed_ms, result])
    };
    
    trace = new_trace();
    results = run_in_trace(trace, {
        
        model_import_foreign("two_normals");
        action(print(num_iters));
        
        // sample from prior
        linear_regression_outliers_program(xs);

        // observe dataset
        for_each_indexed(ys,
            (i, yi) -> { observe y(${integer(i)}) = yi; });

        // run markov chain
        mapM(run_inference_step, arange(num_iters))
        // repeat(10, run_inference_step)
    });
    results
};

In [11]:
%%venturescript
define do_experiment = () -> {
    train_dataset = load_dataset("../train.csv");
    xs = train_dataset[0];
    ys = train_dataset[1];
    num_steps = 100;
    num_replicates = 1;
    parallel_mapv(
        (i) -> {run_custom_mcmc(xs, ys, num_steps)},
        arange(num_replicates))
};

In [12]:
results = %venturescript do_experiment()

100.0


In [13]:
results[0]

[[3.251002073287964,
  OrderedDict([('slope', 1.2423381478964441),
               ('intercept', -1.4984647096562276),
               ('inlier_log_var', -0.5831103769646205),
               ('outlier_log_var', -1.5088854422947795),
               ('prob_outlier', 0.5)])],
 [6.5285680294036865,
  OrderedDict([('slope', 1.2423381478964441),
               ('intercept', -1.4984647096562276),
               ('inlier_log_var', -0.5831103769646205),
               ('outlier_log_var', -1.5088854422947795),
               ('prob_outlier', 0.5)])],
 [8.489532947540283,
  OrderedDict([('slope', 0.9473409020791068),
               ('intercept', -0.49928889141384425),
               ('inlier_log_var', -0.5831103769646205),
               ('outlier_log_var', -0.7731806521577549),
               ('prob_outlier', 0.5)])],
 [11.208483934402466,
  OrderedDict([('slope', 0.9473409020791068),
               ('intercept', -0.49928889141384425),
               ('inlier_log_var', 0.10181344034797513),
      

In [14]:
import numpy as np

In [15]:
results[0]

[[3.251002073287964,
  OrderedDict([('slope', 1.2423381478964441),
               ('intercept', -1.4984647096562276),
               ('inlier_log_var', -0.5831103769646205),
               ('outlier_log_var', -1.5088854422947795),
               ('prob_outlier', 0.5)])],
 [6.5285680294036865,
  OrderedDict([('slope', 1.2423381478964441),
               ('intercept', -1.4984647096562276),
               ('inlier_log_var', -0.5831103769646205),
               ('outlier_log_var', -1.5088854422947795),
               ('prob_outlier', 0.5)])],
 [8.489532947540283,
  OrderedDict([('slope', 0.9473409020791068),
               ('intercept', -0.49928889141384425),
               ('inlier_log_var', -0.5831103769646205),
               ('outlier_log_var', -0.7731806521577549),
               ('prob_outlier', 0.5)])],
 [11.208483934402466,
  OrderedDict([('slope', 0.9473409020791068),
               ('intercept', -0.49928889141384425),
               ('inlier_log_var', 0.10181344034797513),
      

In [16]:
fname = 'venture_mh.results.csv'
header=['num_steps', 'runtime', 'score', 'slope',
    'intercept', 'inlier_log_var', 'outlier_log_var']
with open(fname, 'w') as f:
    f.write(','.join(header))
    f.write('\n')

for result in results:
    elapsed = np.cumsum([row[0] for row in result])
    stats = [row[1] for row in result]
    with open(fname, 'a') as f:
        for i, (elapsed, row) in enumerate(zip(elapsed, stats)):
            num_iters = i + 1
            f.write('%d,%1.10f,0,%1.10f,%1.10f,%1.10f,%1.10f\n' % (
                num_iters, elapsed,
                row['slope'],
                row['intercept'],
                row['inlier_log_var'],
                row['outlier_log_var']
            ))

In [17]:
cat venture_mh.results.csv

num_steps,runtime,score,slope,intercept,inlier_log_var,outlier_log_var
1,3.2510020733,0,1.2423381479,-1.4984647097,-0.5831103770,-1.5088854423
2,9.7795701027,0,1.2423381479,-1.4984647097,-0.5831103770,-1.5088854423
3,18.2691030502,0,0.9473409021,-0.4992888914,-0.5831103770,-0.7731806522
4,29.4775869846,0,0.9473409021,-0.4992888914,0.1018134403,-0.4936792770
5,43.5688450336,0,0.9473409021,-0.0371101269,0.1018134403,-0.4936792770
6,59.9677278996,0,0.9473409021,0.1026517909,0.1329970920,-0.1351399810
7,78.5865778923,0,0.5679937942,0.4408880776,0.5606223946,-0.1351399810
8,100.0458519459,0,0.5679937942,0.4408880776,0.8458835790,-0.1351399810
9,123.7524278164,0,0.3120443587,1.0131030122,1.3814271044,-0.1351399810
10,150.0510716438,0,0.3120443587,1.3122869301,1.3814271044,-0.1351399810
11,178.3030955791,0,0.0529981538,1.4775720680,1.3814271044,-0.1461916335
12,209.0237724781,0,-0.2111921857,1.4775720680,1.4004382293,0.5246472984
13,241.8161923885,0,-0.2111921857,2.1812569203,1.8