## Example setup

In [48]:
library(shifttx)

In [49]:
# Example based on the data-generating mechanism presented in the simulation
set.seed(489327)
n <- 100
W <- data.frame(W1 = runif(n), W2 = rbinom(n, 1, 0.7))
A <- rpois(n, lambda = exp(3 + .3 * log(W$W1) - 0.2 * exp(W$W1) * W$W2))
Y <- rbinom(n, 1, plogis(-1 + 0.05 * A - 0.02 * A * W$W2 + 0.2 * A * tan(W$W1^2)
                         - 0.02 * W$W1 * W$W2 + 0.1 * A * W$W1 * W$W2))

# functional forms for g and Q
fitA.0 <- glm(A ~ I(log(W1)) + I(exp(W1)):W2, family = poisson,
              data = data.frame(A, W))
fitY.0 <- glm(Y ~ A + A:W2 + A:I(tan(W1^2)) + W1:W2 + A:W1:W2,
              family = binomial, data = data.frame(A, W))

gn.0  <- function(A = A, W = W) {
  dpois(A, lambda = predict(fitA.0, newdata = W, type = "response"))
}

Qn.0 <- function(A = A, W = W) {
  predict(fitY.0, newdata = data.frame(A, W, row.names = NULL),
          type = "response")
}

## Original implementation (vdL & Diáz, 2012)

In [50]:
n_iter <- 3
tmle_out_iter <- as.matrix(replicate(2, rnorm(n_iter)))

In [51]:
for (j in seq_len(n_iter)) {
    tmle_shift_new <- tmle_shift(Y = Y, A = A, W = W, Qn = Qn.0, gn = gn.0,
                                 delta = 2, tol = 1e-4, iter_max = n_iter,
                                 A_val = seq(1, 60, 1))
    tmle_out_iter[j, ] <- tmle_shift_new[1:2]
}

In [63]:
tmle_shift_new[1:2]

In [64]:
tmle_out_iter

0,1
0.5724689,0.003109476
0.5724689,0.003109476
0.5724689,0.003109476


## 1-TMLE implementation (vdL & Diáz, 2017)

In [58]:
est_eqn2 <- function(eps, QnAW, Qn, H1, gn0d, EQnd, D2, prev_sum, Y, A, W,
                     delta) {

  sum((Y - (QnAW + eps * H1)) * H1 + (Qn(A + delta, W) - EQnd) -
      rowSums(D2 * exp(eps * D2 + prev_sum) * gn0d) /
      rowSums(exp(eps * D2 + prev_sum) * gn0d))
}

In [59]:
f_iter2 <- function(Qn, gn, gn0d = NULL, prev_sum = 0, first = FALSE, h_int,
                    Y, A, W, delta, A_val) {

  # numerical integrals and equation (7)
  Qnd <- t(sapply(seq_len(nrow(W)), function(i) Qn(A_val + delta, W[i, ])))
  gnd <- t(sapply(seq_len(nrow(W)), function(i) gn(A_val, W[i, ])))
  gnd <- gnd / rowSums(gnd)
  if (first) gn0d <- gnd

  # Riemann sum for integral defining estimate of Qn
  EQnd <- rowSums(Qnd * gnd) * h_int
  D2 <- Qnd - EQnd
  QnAW <- Qn(A, W)
  H1 <- gn(A - delta, W) / gn(A, W)

  # equation (8)
  est_eqn_min  <- stats::uniroot(est_eqn2, c(-1, 1),  Y = Y, A = A, W = W,
                                 delta = delta, QnAW = QnAW, Qn = Qn, H1 = H1,
                                 gn0d = gn0d, EQnd = EQnd, D2 = D2,
                                 prev_sum = prev_sum)
  eps <- est_eqn_min$root

  # updated values
  #if (first) {  # get an initial estimate during the first run
  #    gn_ini_est <- function(a, w) exp(eps * Qn(a + delta, w)) * gn(a, w)
  #} 
  Qn_new   <- function(a, w) Qn(a, w) + eps * gn(a - delta, w) / gn(a, w)
  prev_sum <- prev_sum + eps * D2
  return(list(Qn = Qn_new,  # we wish to update Qn just once
              #if (first) {
              #    gn = gn_ini_est
              #} else {
                  gn = gn,
              #},  # we don't want to update gn -- not necessary
              prev_sum = prev_sum,
              eps = eps,
              gn0d = gn0d)  # the initial estimate of gn
        )
}

In [68]:
tmle1_shift <- function(Y, A, W,
                        Qn, gn,
                        delta, A_val) {

  # interval partition length, A_val assumed equally spaced
  n_A_val <- length(A_val)
  h_int <- (A_val[n_A_val] - A_val[1]) / (n_A_val - 1)

  # inputs are estimator functions of Q and g
  # first run returns initial estimators
  # subsequent runs return their updated valeus
  ini_out <- f_iter2(Qn = Qn,  # functional form provided as input
                     gn = gn,  # functional form provided as input
                     gn0d = NULL,
                     prev_sum = 0,
                     first = TRUE,
                     h_int = h_int,
                     W = W, A = A, A_val = A_val, Y = Y, delta = delta)
  gn0d <- ini_out$gn0d

  # update step: 1-TMLE requires we only do this for Qn
  new_out <- f_iter2(Qn = ini_out$Qn,  # initial estimate obtained from last run
                     gn = ini_out$gn,  # same functional form as input; NOT updated in this run
                     gn0d = gn0d,  # initial estimate for gn; NOT to be updated
                     prev_sum = ini_out$prev_sum,
                     first = FALSE,
                     h_int = h_int,
                     W = W, A = A, A_val = A_val, Y = Y, delta = delta)
    

  Qnd <- t(sapply(seq_len(nrow(W)), function(i) ini_out$Qn(A_val + delta, W[i, ])))
  #gnd <- t(sapply(seq_len(nrow(W)), function(i) new_out$gn(A_val, W[i, ])))
  #gnd <- gnd / rowSums(gnd)

  # this is an integral computed by Riemann sum
  Qbar_n_star <- rowSums(Qnd * gn0d) * h_int

  # plug in tmle
  psi_hat <- mean(Qbar_n_star)

  # influence curve of tmle
  IC <- (Y - ini_out$Qn(A, W)) * ini_out$gn(A - delta, W) / ini_out$gn(A, W) +
    ini_out$Qn(A + delta, W) - psi_hat
  var_hat <- stats::var(IC) / length(Y)
      
  # construct output object in S3 style
  out <- list(psi_hat = psi_hat, var_hat = var_hat, ic = as.vector(IC))
  class(out) <- "shift_1tmle"
  return(out)
}

In [69]:
tmle1_new <- tmle1_shift(Y = Y, A = A, W = W,
                         Qn = Qn.0, gn = gn.0,
                         delta = 2, A_val = seq(1, 60, 1))

In [70]:
tmle1_new$psi
tmle1_new$var

## Check 1-TMLE is similar to iterative procedure

What? Are you serious? That was all...

In [65]:
# old one -- using new_out and ini_out
all.equal(tmle1_new$psi_hat, tmle_shift_new[1])

In [71]:
# new one -- using only ini_out
all.equal(tmle1_new$psi_hat, tmle_shift_new[1])

In [66]:
# old one -- using new_out and ini_out
all.equal(tmle1_new$var_hat, tmle_shift_new[2])

In [72]:
# new one -- using only ini_out
all.equal(tmle1_new$var_hat, tmle_shift_new[2])

Maybe the influence curve estimates are different?

In [67]:
all.equal(tmle1_new$ic, tmle_shift_new[3:102])