In [1]:
library(itecv) # should be loaded first
library(magrittr)
library(foreach)
library(doFuture)
library(zeallot)
library(tidyverse)
library(rlearner)

plotsize = function (x,y) options(repr.plot.width=x, repr.plot.height=y)

registerDoFuture()
plan(cluster, workers = 4)

Loading required package: future
Loading required package: iterators
Loading required package: parallel

Attaching package: ‘zeallot’

The following objects are masked from ‘package:future’:

    %->%, %<-%

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.5
✔ tibble  1.4.2     ✔ dplyr   0.7.6
✔ tidyr   0.8.1     ✔ stringr 1.3.1
✔ readr   1.1.1     ✔ forcats 0.3.0
“package ‘dplyr’ was built under R version 3.5.1”── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ purrr::accumulate() masks foreach::accumulate()
✖ tidyr::extract()    masks magrittr::extract()
✖ dplyr::filter()     masks stats::filter()
✖ dplyr::lag()        masks stats::lag()
✖ purrr::set_names()  masks magrittr::set_names()
✖ purrr::when()       masks foreach::when()
“replacing previous import ‘magrittr::extract’ by ‘tidyr::extract’ when loading ‘rlearner’”
Attaching package: ‘rlearner’

The following object is masked from ‘

In [44]:
model_specs = list(
    gbm = list(
        tune_grid = expand.grid(
            n.trees = seq(1,501,40), 
            interaction.depth=3, 
            shrinkage = 0.1, 
            n.minobsinnode=3),
        extra_args = list(
            verbose=F, 
            bag.fraction=0.5)),
    glmnet = list(
        tune_grid = expand.grid(
            alpha=c(0,0.5,1),
            lambda=exp(seq(-5,2,0.4))),
        extra_args = list()))

metrics = list(
    mse = bundle_mse,
    wmse = bundle_wmse,
#     dr_mse = bundle_dr_mse,
    r_objective = bundle_r_objective,
    match_mse = bundle_match_mse,
    trans_mse = bundle_trans_mse,
#     match_mdc = bundle_match_mdc,
#     trans_mdc = bundle_trans_mdc,
    ip_value = bundle_ip_value,
    dml_value = bundle_dml_value,
#     gain = bundle_gain,
#     c_benefit = bundle_c_benefit,
#     qini = bundle_qini,
    random = bundle_random)

replicates = 1:2
DGPs = powers_DGPs()[1:2] %>% list(1:2) %>% 
    pmap(function(dgp, id) {
        dgp$id = id
        dgp
    })

In [75]:
?unlist

In [76]:
c(itrain, ival, itest) %<-% make_indices(150, 50, 100)
cv_indices = function(itrain, ival, n_folds) {
    train_total = c(itrain, ival)
    n_train = length(train_total)
    splits = sample(train_total) %>%
        split(rep(1:n_folds, ceiling(n_train/n_folds)))
    icv = 1:n_folds %>% map(function(fold){
        list(
            train = splits[-fold] %>% unlist(use.names=F),
            val = splits[[fold]])
    })
}
icv = cv_indices(itrain, ival, 5)

In [80]:
itrain

In [79]:
ival

In [77]:
icv

In [55]:
n_folds = 5
train_total = c(itrain, ival)
n_train = length(train_total)
splits = sample(train_total) %>%
    split(rep(1:n_folds, ceiling(n_train/n_folds)))

In [72]:
length(ival)

In [70]:
train_total %>% length

In [68]:
splits[-1] %>% unlist %>% length

In [7]:
library(rsample)

Loading required package: broom

Attaching package: ‘rsample’

The following object is masked from ‘package:tidyr’:

    fill



In [39]:
train_total = c(itrain, ival)
n_train = length(train_total) 
n_folds = 5

In [42]:
sample(train_total) %>% split(1:n_train>n_train/n_folds) %>% map(length)

In [8]:
DGP = DGPs[[1]]
data = create_data(DGP, length(c(itrain, ival, itest)))

In [3]:
learners = list(
     rlearner=rlearner_cv, 
     slearner=slearner_cv, 
     tlearner=tlearner_cv) 

In [4]:
run_sim = function(replicate, DGP, model_specs, learners, metrics, itrain, ival, itest) {
    data = create_data(DGP, length(c(itrain, ival, itest)))
    estimates = estimate_val_test(data, itrain, model_specs)
    val_bundle = learn_validation_auxiliaries(data, ival, model_specs, randomized=DGP$randomized)
    val_metrics = estimate_val_metrics(estimates, val_bundle, metrics, ival)
    test_metrics = calc_test_metrics(data, estimates, itest)
    
    learner_estimates = estimate_learner_test(data, learners, model_specs, itrain, ival, itest)
    
    learner_result = calc_test_metrics(data, learner_estimates, itest) %>%
        mutate(replicate=replicate, dgp=DGP$id)
    
    selection_result = inner_join(val_metrics, test_metrics, by="model") %>%
        mutate(replicate=replicate, dgp=DGP$id)
    
    list(selection_result, learner_result)
}

In [5]:
start_time <- Sys.time()

struct = foreach(replicate = replicates, 
        .combine = c,
        .export=c("model_specs", "learners", "metrics", "itrain", "ival", "itest", "run_sim"),
        .packages = (.packages())) %:%
    foreach(DGP = DGPs, 
#             .combine = list,
            .export=c("model_specs", "learners", "metrics", "itrain", "ival", "itest", "run_sim"),
            .packages = (.packages()))  %dopar%  {
        run_sim(replicate, DGP, model_specs, learners, metrics, itrain, ival, itest) 
    } %>% transpose() %>%
map(bind_rows)
struct[[1]] %>% write_csv("data/experimental_results_selection.csv")
struct[[2]] %>% write_csv("data/experimental_results_learner.csv")

end_time <- Sys.time()
end_time - start_time

Time difference of 2.593124 mins