This example code is copy-pasted from the [readme file of sl3](https://github.com/tlverse/sl3) and the [readme file of condensier](https://github.com/osofr/condensier). None of this code is mine; it is all copied.

Note: many of the libraries suggested by sl3 and used in this demo are not available in the default Anaconda repository. However, all of the suggested libraries not in the default Anaconda repository (besides `keras` and `kerasR`) are also available in the `krinsman` channel where the `sl3` package is found.

In [1]:
library(sl3)

In [2]:
library(magrittr)
library(dplyr)
library(data.table)
library(origami)
library(SuperLearner)
library(glmnet)
library(xgboost)
library(Rsolnp)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

origami: Generalized Cross-Validation Framework
Version: 1.0.0
Loading required package: nnls
Super Learner
Version: 2.0-23
Package created on 2018-03-09

Loading required package: Matrix
Loading required package: foreach
Loaded glmnet 2.0-13


Attaching package: ‘xgboost’

The following object is masked from ‘package:dplyr’:

    slice



In [3]:
data(cpp)
cpp <- cpp %>%
  dplyr::filter(!is.na(haz)) %>%
  mutate_all(funs(replace(., is.na(.), 0)))

In [4]:
# use covariates of intest and the outcome to build a task object
covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs",
            "sexn")
task <- sl3_Task$new(cpp, covariates = covars, outcome = "haz")


In [5]:
# set up screeners and learners via built-in functions and pipelines
slscreener <- Lrnr_pkg_SuperLearner_screener$new("screen.glmnet")
glm_learner <- Lrnr_glm$new()
screen_and_glm <- Pipeline$new(slscreener, glm_learner)
SL.glmnet_learner <- Lrnr_pkg_SuperLearner$new(SL_wrapper = "SL.glmnet")


In [6]:
# stack learners into a model (including screeners and pipelines)
learner_stack <- Stack$new(SL.glmnet_learner, glm_learner, screen_and_glm)
stack_fit <- learner_stack$train(task)

In [7]:
preds <- stack_fit$predict()
head(preds)

Lrnr_pkg_SuperLearner_SL.glmnet,Lrnr_glm_TRUE,Lrnr_pkg_SuperLearner_screener_screen.glmnet___Lrnr_glm_TRUE
0.35673069,0.36298498,0.36228209
0.35673069,0.36298498,0.36228209
0.25045165,0.25993072,0.25870995
0.25045165,0.25993072,0.25870995
0.25045165,0.25993072,0.25870995
0.03938581,0.05680264,0.05600958


In [8]:
library(simcausal)

In [9]:
D <- DAG.empty()
D <-
D + node("W1", distr = "rbern", prob = 0.5) +
  node("W2", distr = "rbern", prob = 0.3) +
  node("W3", distr = "rbern", prob = 0.3) +
  node("sA.mu", distr = "rconst", const = (0.98 * W1 + 0.58 * W2 + 0.33 * W3)) +
  node("sA", distr = "rnorm", mean = sA.mu, sd = 1)
D <- set.DAG(D, n.test = 10)

...automatically assigning order attribute to some nodes...
node W1, order:1
node W2, order:2
node W3, order:3
node sA.mu, order:4
node sA, order:5


In [10]:
datO <- sim(D, n = 10000, rndseed = 12345)

simulating observed dataset from the DAG object


In [11]:
library("condensier")

condensier
The condensier package is still in beta testing. Interpret results with caution.


In [12]:
dens_fit <- fit_density(
    X = c("W1", "W2", "W3"), 
    Y = "sA", 
    input_data = datO, 
    nbins = 20, 
    bin_method = "equal.mass",
    bin_estimator = speedglmR6$new())

In [13]:
newdata <- datO[1:5, c("W1", "W2", "W3", "sA"), with = FALSE]
preds <- predict_probability(dens_fit, newdata)

sampledY <- sample_value(dens_fit, newdata)

dens_fit <- fit_density(
    X = c("W1", "W2", "W3"),
    Y = "sA",
    input_data = datO,
    bin_estimator = speedglmR6$new(),
    intrvls = list(sA = seq(-4,4, by = 0.1)))

dens_fit <- fit_density(
    X = c("W1", "W2", "W3"),
    Y = "sA",
    input_data = datO,
    bin_estimator = speedglmR6$new(),
    intrvls = list(sA = seq(-4,4, by = 0.1)),
    pool = TRUE)

In [14]:
task <- sl3_Task$new(datO, covariates=c("W1", "W2", "W3"), outcome="sA")
lrn <- Lrnr_condensier$new(nbins = 10, bin_method = "equal.len", pool = TRUE, 
  bin_estimator = Lrnr_xgboost$new(nrounds = 5, objective = "reg:logistic"))

trained_lrn = lrn$train(task)

newdata <- datO[1:5, c("W1", "W2", "W3", "sA")]
new_task <- sl3_Task$new(newdata, covariates=c("W1", "W2", "W3"),outcome="sA" )
pred_probs = trained_lrn$predict(new_task)
pred_probs

likelihood
0.06301719
0.05660773
0.06301719
0.05085018
0.07809542


In [15]:
trained_lrn = lrn$train(task)

newdata <- datO[1:5, c("W1", "W2", "W3", "sA")]
new_task <- sl3_Task$new(newdata, covariates=c("W1", "W2", "W3"),outcome="sA" )
pred_probs = trained_lrn$predict(new_task)
pred_probs

likelihood
0.06301719
0.05660773
0.06301719
0.05085018
0.07809542


In [16]:
lrn1 <- Lrnr_condensier$new(nbins = 25, bin_method = "equal.len", pool = TRUE, 
  bin_estimator = Lrnr_glm_fast$new(family = "binomial"))
lrn2 <- Lrnr_condensier$new(nbins = 20, bin_method = "equal.mass", pool = TRUE,
  bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
lrn3 <- Lrnr_condensier$new(nbins = 35, bin_method = "equal.len", pool = TRUE,
  bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))

In [17]:
lrn <- Lrnr_condensier$new(nbins = 35, bin_method = "equal.len", pool = TRUE, bin_estimator = 
  Lrnr_sl$new(
    learners = list(
      Lrnr_glm_fast$new(family = "binomial"),
      Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic")
      ),
    metalearner = Lrnr_glm$new()
    ))
binSL_fit <- lrn$train(task)