In [1]:
library(itecv) # should be loaded first
library(magrittr)
library(foreach)
library(doFuture)
library(zeallot)
library(tidyverse)

plotsize = function (x,y) options(repr.plot.width=x, repr.plot.height=y)

registerDoFuture()
plan(cluster, workers = 2)

: package ‘doFuture’ was built under R version 3.2.5Loading required package: future
Loading required package: iterators
: package ‘iterators’ was built under R version 3.2.5Loading required package: parallel

Attaching package: ‘zeallot’

The following objects are masked from ‘package:future’:

    %->%, %<-%

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
: package ‘forcats’ was built under R version 3.2.5── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ purrr::accumulate() masks foreach::accumulate()
✖ tidyr::extract()    masks magrittr::extract()
✖ dplyr::filter()     masks stats::filter()
✖ dplyr::lag()        masks stats::lag()
✖ purrr::set_names()  masks magrittr::set_names()
✖ purrr::when()       masks foreach::when()


In [6]:
model_specs = list(
    gbm = list(
        tune_grid = expand.grid(
            n.trees = seq(1,501,40), 
            interaction.depth=3, 
            shrinkage = 0.1, 
            n.minobsinnode=3),
        extra_args = list(
            verbose=F, 
            bag.fraction=0.5)),
    glmnet = list(
        tune_grid = expand.grid(
            alpha=c(0,0.5,1),
            lambda=exp(seq(-5,2,0.4))),
        extra_args = list()))

metrics = list(
    mse = bundle_mse,
    wmse = bundle_wmse,
    r_objective = bundle_r_objective,
    match_mse = bundle_match_mse,
    trans_mse = bundle_trans_mse,
    match_mdc = bundle_match_mdc,
    trans_mdc = bundle_trans_mdc,
    ip_value = bundle_ip_value,
    dml_value = bundle_dml_value,
    gain = bundle_gain,
#     c_benefit = bundle_c_benefit,
    qini = bundle_qini,
    random = bundle_random)

replicates = 1:10
DGPs = powers_DGPs() %>% list(1:16) %>% 
    pmap(function(dgp, id) {
        dgp$id = id
        dgp
    })
c(itrain, ival, itest) %<-% make_indices(100, 100, 100)

In [7]:
run_sim = function(replicate, DGP, model_specs, metrics, itrain, ival, itest) {
    data = create_data(DGP, length(c(itrain, ival, itest)))
    estimates = estimate_val_test(data, itrain, model_specs)
    val_bundle = learn_validation_auxiliaries(data, ival, model_specs, randomized=DGP$randomized)
    val_metrics = estimate_val_metrics(estimates, val_bundle, metrics, ival)
    test_metrics = calc_test_metrics(data, estimates, itest)
    
    inner_join(val_metrics, test_metrics, by="model") %>%
        mutate(replicate=replicate, dgp=DGP$id)
}

In [18]:
foreach(replicate = replicates, 
        .export=c("model_specs", "metrics", "itrain", "ival", "itest", "run_sim"),
        .combine = bind_rows,
        .packages = (.packages())) %:%
    foreach(DGP = DGPs, 
            .export=c("model_specs", "metrics", "itrain", "ival", "itest", "run_sim"),
            .combine = bind_rows,
            .packages = (.packages()))  %dopar%  {
        run_sim(replicate, DGP, model_specs, metrics, itrain, ival, itest) 
    } %>%
write_csv("data/experimental_results.csv")

----

----

----

In [25]:
library(gridExtra)
plotsize(20,15)

averaged_results = results %>%
    group_by(dgp, model) %>%
    select(-replicate) %>%
    summarize_all(mean)

In [32]:
test_metrics = averaged_results %>% select(dgp, model, value, tmse)
val_metrics = averaged_results %>% select(-value, -tmse)

In [43]:
ttm = test_metrics %>% gather(tt_metric, tt_figure, -model, -dgp)
vm = val_metrics %>% gather(v_metric, v_figure, -model, -dgp)

plots = inner_join(ttm, vm, by=c("model","dgp")) %>%
    separate(model, c("learner", "model"), sep="\\$") %>%
    separate(model, c("method", "params"), sep="@") %>%
split(.$dgp) %>% imap(function(plot_data, dgp_id) {
    plot_data %>%
    ggplot(aes(x=tt_figure, y=v_figure, color=method, shape=learner)) +
    geom_point() +
    facet_grid(v_metric~tt_metric, scales="free") +
    ggtitle(str_c('DGP: ', dgp_id))
})
grid.arrange(grobs=plots, nrow=1)