In [33]:
library(plyr)
library(magrittr)
library(distr)
library(distrEx)
library(gbm)
library(Matching)
library(caret)
library(tidyverse)
source("R/simulation_scenarios.R")
source("R/hte_caret.R")
source("R/eval_hte_cv.R")

In [15]:
DGP = list()
n = 5

X1 = Norm(-1)
X2 = Norm(1)
DGP$X = list(X1, X2)

DGP$f_W_x = function(x, w) {
    logit_p = x[1] + x[2]
    p = exp(logit_p) / (1 + exp(logit_p))
    Binom(prob=p)
}

DGP$f_Y_xw = function(x, w) {
    if(w) {
        Weibull(scale=abs(x[1] + x[2]) + 0.7, 
                shape=1.2)
    } else {
        Weibull(scale= abs(x[1]) + abs(x[2]), 
                shape=1.5)
    }
}

DGP$f_C_xw = function(x, w) {
    Weibull(scale=4, 
            shape=1.4)
}

In [4]:
datas = setup_data(DGP, 500, 500, 3)

: package ‘bindrcpp’ was built under R version 3.2.5

In [35]:
models = list(
    gbm_spec = list(method = "gbm",
                tune_grid = expand.grid(n.trees = seq(1,501,20), 
                                        interaction.depth=3, 
                                        shrinkage = 0.2, 
                                        n.minobsinnode=3)))
herp = datas %$% get_estimates(data, models, cv_index, test_index)

In [36]:
herp$test_estimates

Unnamed: 0,subject,model,treatment,time,event,est_effect,est_rel_risk,fold
1,757,gbm~1~3~0.2~3,FALSE,3.64551128602541,TRUE,-0.198501977898678,0.334351235464197,training
2,757,gbm~21~3~0.2~3,FALSE,3.64551128602541,TRUE,-0.588397241341944,1.20812243337055,training
3,757,gbm~41~3~0.2~3,FALSE,3.64551128602541,TRUE,-1.34816099797716,0.755788785051184,training
4,757,gbm~61~3~0.2~3,FALSE,3.64551128602541,TRUE,-1.37082644491564,-0.71842793972595,training
5,757,gbm~81~3~0.2~3,FALSE,3.64551128602541,TRUE,0.0894794572852811,-0.647924156756333,training
6,757,gbm~101~3~0.2~3,FALSE,3.64551128602541,TRUE,-1.00388136285239,-1.64904424345648,training
7,757,gbm~121~3~0.2~3,FALSE,3.64551128602541,TRUE,0.542031688925178,-2.30039818283477,training
8,757,gbm~141~3~0.2~3,FALSE,3.64551128602541,TRUE,-0.277445785945214,-2.99412764411653,training
9,757,gbm~161~3~0.2~3,FALSE,3.64551128602541,TRUE,1.46063056538441,-4.19128097557351,training
10,757,gbm~181~3~0.2~3,FALSE,3.64551128602541,TRUE,1.94505796465253,-4.64381059524355,training


In [17]:
method = "gbm"
tune_grid = expand.grid(n.trees = seq(1,501,20), 
                        interaction.depth=3, 
                        shrinkage = 0.2, 
                        n.minobsinnode=5)

In [18]:
data = datas$data
fold = datas$cv_index[[1]]
fold_name = "derp"

In [34]:
training_data = data[fold,] %>% mutate(sample_type="training")
validation_data = data[-fold,] %>% mutate(sample_type="test")

fold_data = training_data %>% 
    split(.$treatment) %>%
    map(~prep_fold_data(., validation_data)) #now have a list (treat => (data, fold))
predictions = fold_data %>%
    map(~fit_model(.$data, .$index, method, tune_grid)) # returns the big matrix with all test set predictions for each treatment

In [32]:
fold_data

ERROR: Error in vapply(seq_along(mapped), function(i) {: values must be length 1,
 but FUN(X[[1]]) result is length 0


Unnamed: 0,subject,event,time,treatment,covariate_1,covariate_2,set,sample_type,rowIndex
1,5,TRUE,4.04424681292024,FALSE,0.376040590986811,1.58436095939754,training,training,1
2,10,TRUE,2.99855025711998,FALSE,-2.63286814120055,0.22437745452403,training,training,2
3,11,FALSE,2.35046332446662,FALSE,-1.34799998513766,1.57253376605466,training,training,3
4,13,TRUE,1.0419201101671,FALSE,-0.576488829949579,0.810331337898358,training,training,4
5,18,FALSE,1.31614521711864,FALSE,-1.18961557159428,-1.31363159760378,training,training,5
6,19,TRUE,2.20596603138287,FALSE,-1.62706120910569,0.78888048072309,training,training,6
7,22,TRUE,0.515610611530156,FALSE,-1.28794801280286,0.387047487197486,training,training,7
8,25,TRUE,0.398526711202522,FALSE,0.690258759770618,-0.311249090354522,training,training,8
9,27,TRUE,2.13815671799964,FALSE,-0.798259097827028,0.270122404620965,training,training,9
10,29,FALSE,1.11407548556755,FALSE,-3.16636556031551,2.0170843519346,training,training,10

Unnamed: 0,subject,event,time,treatment,covariate_1,covariate_2,set,sample_type,rowIndex
1,1,TRUE,2.53774846152179,TRUE,-1.61255850956139,0.409515365670377,training,training,1
2,2,FALSE,2.71827733704043,TRUE,-0.503103726247373,1.34486559569834,training,training,2
3,3,FALSE,1.65890380980767,TRUE,-1.13596103393759,1.7990236440354,training,training,3
4,4,FALSE,2.0591459947251,TRUE,0.416616244293821,1.41176103105198,training,training,4
5,9,FALSE,2.15491469635735,TRUE,-1.72185518784172,3.26157321408187,training,training,5
6,12,TRUE,0.292790411575288,TRUE,1.0160244855512,0.6195473237259,training,training,6
7,16,FALSE,0.595982566091632,TRUE,1.51094488438347,0.864465810356077,training,training,7
8,17,TRUE,1.0761518550204,TRUE,-0.0229728398475311,2.30194395347172,training,training,8
9,20,TRUE,0.120281979019321,TRUE,-1.23771675995501,1.20385038551004,training,training,9
10,21,TRUE,0.107999949807999,TRUE,-2.46066579166752,1.69544374503499,training,training,10


In [30]:
herp = fit_model(fold_data[[1]]$data, fold_data[[1]]$index, method, tune_grid)

ERROR: Error in xj[i]: invalid subscript type 'list'


In [31]:
herp = fit_model(datas$data, datas$cv_index[[1]], method, tune_grid)

In [9]:
tail(herp)

Unnamed: 0,rowIndex,interaction.depth,shrinkage,n.minobsinnode,n.trees,pred
17363,995,3,0.2,5,501,0.02446425
17364,996,3,0.2,5,501,-1.730547
17365,997,3,0.2,5,501,-0.7397968
17366,998,3,0.2,5,501,-1.922014
17367,999,3,0.2,5,501,0.06534832
17368,1000,3,0.2,5,501,-0.6591557
