# Implementation of algorithm from Diaz and van der Laan

In [47]:
library(condensier)

In [48]:
rm(list = ls())
set.seed(429153)
n_obs <- 100
n_w <- 3

In [49]:
# simulate simple data for tmle-shift sketch
W <- replicate(n_w, rnorm(n_obs))
A <- rowSums(cos(exp(W)) + W)
Y <- sin(A)
O <- as.data.frame(cbind(W,A,Y))
colnames(O) <- c(paste0("W", seq_len(n_w)), "A", "Y")
head(O)

W1,W2,W3,A,Y
1.3357904,-1.2142,0.7023165,0.5581855,0.529648
0.5852906,1.040991,-0.7216398,0.6135768,0.5757955
1.3420567,-1.848015,-1.0266263,-0.3826661,-0.3733951
0.73432,-1.726326,-0.5078887,-0.1823722,-0.181363
-0.1484268,-1.520334,1.2997251,0.3935576,0.3834764
1.7497752,1.465439,-0.608678,3.951361,-0.7241274


## function for treatment shift $d(a,w)$

In [59]:
tx_shift <- function(a, w = NULL, delta, type = "additive") {
    if (type == "additive") {
        shift_A <- A - delta
    }
    return(shift_A)
}

## function for estimating $g_n$

In [60]:
est_g <- function(A, W, delta = 0, ...) {
    # make data object
    data_O <- as.data.frame(cbind(A, W))
    colnames(data_O) <- c("A", paste0("W", seq_len(ncol(W))))
    
    # fit conditional density with condensier
    fit_g_A <- fit_density(X = c(paste0("W", seq_len(ncol(W)))),
                           Y = "A", input_data = data_O, ...)

    # predict probabilities for the un-shifted data (A = a)
    pred_g_A <- predict_probability(model_fit = fit_g_A, newdata = data_O)

    # predict probabilities for the shifted data (A = a - delta)
    data_O_shifted <- data_O
    data_O_shifted$A <- tx_shift(a = data_O_shifted$A, delta = delta)
    pred_g_A_shifted <- predict_probability(model_fit = fit_g_A,
                                            newdata = data_O_shifted)

    # create output matrix: scenarios A = a, A = a - delta
    out <- as.data.frame(cbind(pred_g_A, pred_g_A_shifted))
    colnames(out) <- c("A_is_a", "A_is_a_minus_delta")
    rownames(out) <- NULL
    return(out)
}

testing function for estimating $g_n$

In [61]:
test_est_g <- est_g(A = A, W = W, delta = 0.5,
                    nbins = 20, bin_method = "equal.mass",
                    bin_estimator = speedglmR6$new())

In [62]:
test_est_g

A_is_a,A_is_a_minus_delta
2.66389320,3.903669e-02
2.33267068,1.366437e-02
0.23852178,2.145538e-03
10.70006302,1.911927e-03
0.71274721,2.059011e-02
0.53354568,5.335457e-01
0.84669762,3.648089e-01
0.13929790,1.392979e-01
0.72804553,1.744508e+00
2.52839389,4.308739e-01


## function for estimating $Q_n$

In [63]:
est_Q <- function(Y, A, W, delta = 0, reg_form = "Y ~ .") {
    # make data object
    data_O <- as.data.frame(cbind(Y, A, W))
    colnames(data_O) <- c("Y", "A", paste0("W", seq_len(ncol(W))))
    
    # fit conditional density with condensier
    fit_Q_AW <- glm(as.formula(reg_form), data = data_O)

    # predict probabilities for the un-shifted data (A = a)
    pred_Q_AW <- predict(fit_Q_AW, newdata = data_O)

    # predict probabilities for the shifted data (A = a - delta)
    data_O_shifted <- data_O
    data_O_shifted$A <- tx_shift(a = data_O_shifted$A, delta = delta)
    pred_Q_AW_shifted <- predict(fit_Q_AW, newdata = data_O_shifted)

    # create output matrix: scenarios A = a, A = a - delta
    out <- as.data.frame(cbind(pred_Q_AW, pred_Q_AW_shifted))
    colnames(out) <- c("A_is_a", "A_is_a_minus_delta")
    rownames(out) <- NULL
    return(out)
}

In [64]:
test_est_Q <- est_Q(Y = Y, A = A, W = W, delta = 0.5)

In [65]:
test_est_Q

A_is_a,A_is_a_minus_delta
0.63787423,0.67753580
0.41505643,0.45471800
0.27471154,0.31437311
0.27402811,0.31368969
0.48222757,0.52188914
0.42741966,0.46708123
0.75424940,0.79391097
0.53164333,0.57130490
-0.06117959,-0.02151802
0.16190625,0.20156783


## function for estimating $H_n$

In [69]:
est_h <- function(g_n, a = NULL, w = NULL) {
    # compute upper and lower limits for treatment
    #...
    #...
    
    # compute the ratio of the propensity scores
    ratio_g <- g_n[, 2] / g_n[, 1]
    
    # modify the ratio of the propensity scores
    # based on the indicators for shifting
    #ind_a <- ...
    #ind_a_delta <- ...
    #h_n <- ind_a * ratio_g + ind_a_delta
    
    # TODO: consider case where there is not support everywhere
    # that is, when the indicators kick in -- ignored for now...
    h_n <- ratio_g
    
    # output
    return(h_n)
}

In [70]:
test_est_h <- est_h(g_n = test_est_g)

In [71]:
test_est_h

## function for fluctuation procedure

In [None]:
est_fluc <- function(Y, Qn, Hn) {
    # extract Q and obtain logit transform
    logitQn_AW <- qlogis(Qn)
    
    
    # run the fluctuation regression
    mod_fluc <- glm(Y ~ -1 + offset(logitQn_AW + Hn),
                    family = "binomial")
    
    # return the fit model object
    out <- mod_fluc
    return(out)
}

---

# Anatomy of the shift-Tx package

The algorithm is based on @diaz2017stochastic.

## Starting Assumptions

1. Start with a simple additive shift -- i.e., $d(a,w) = a + \delta$ if $a <
    u(w) - \delta$ or $d(a,w) = a$ if $a \geq u(w) - \delta$.
2. The additive shift will have _support everywhere_ -- i.e., $a < u(w)$ is true
    everywhere.
3. The data structure that we know and love $O = (W,A,Y)$.

## Functions Needed

* estimate $g_n(W)$
* estimate $Q_n(A, W)$
* estimate auxiliary covariate $H_n(A_i, W_i)$
* fluctuation procedure
* 1-TMLE procedure
* EIF procedure

## Estimate $g_n(W)$

* _input_: W, a
* _output_: a 2-column matrix, with columns for $g_n(A_i - \delta \mid W_i)$ and
    $g_n(A_i \mid W_i)$
* in the inputs $a$ is the additive shift
* use the __fit_density__ function from Oleg's __condensier__ package, need to
    use __predict_prob__ function twice: once for $A_i - \delta$ and once for
    $A_i$

## Estimate $Q_n(A, W)$

* _input_: W, a
* _output_: a 2-column matrix, with columns for $\bar{Q}_n(A_i, W_i)$ and
    $\bar{Q}_n(A_i + \delta, W_i)$

## Estimate $H_n(A_i, W_i)$

* _input_: matrix output produced by $g_n(w)$
* _output_: vector (possibly shifted) of the form described in the eqn below
* $H(a,w) = I(a < u(w)) \frac{g_0(a - \delta \mid w)}{g_0(a \mid w)} + I(a
    \geq u(w) - \delta)$
* By our assumption (2) above -- that we have _support everywhere_ -- we reduce
    the above formulation
* That is, we assume that $I(a < u(w)) = 1$ and $I(a \geq u(w) - \delta) = 0$
* Thus the form of the covariate reduces simply to $H(a,w) = \frac{g_0(a -
    \delta \mid w)}{g_0(a \mid w)}$

## Fluctuation Procedure

* _input_: matrix output from $Q_n(a,w)$, vector output of $H_n$, vector Y
* _output_: model fit object produced from a call to `glm` or `SuperLearner`
* We have the fluctuation model: $logit \bar{Q}_{\epsilon, n}(a,w) =
    logit(\bar{Q}_n(a,w)) + \epsilon \cdot H_n(a,w)$
* Note that the first term on the RHS of the above equation is one of the
    columns generated as output by the function to estimate $Q_n(A,W)$
* this could be fit with R code like the following `glm(Y ~ -1 +
    offset(logitQn_AW + Hn_AW), family = "binomial")`, from which we may extract
    the coefficient, which is $\epsilon_n$ from the above

## 1-TMLE Procedure

* _input_: model fit object produced by the fluctuation procedure above, matrix
    produced by procedure to estimate $Q_n(A,W)$
* _output_: numeric scalar for the mean of $\bar{Q}^*_n$
* note that we have $\psi_n = \frac{1}{n} \sum_{i=1}^n \bar{Q}_n^*(d(A_i, W_i),
    W_i)$
* we obtain $\bar{Q}_n^*$ by calling the appropriate method of predict on the
    shifted data -- i.e., `predict(fit, newdata = data.frame(Qn_dAW), type =
    "response"` (note that use of 'response' performs the `expit()` transform).
* compute the $\psi_n$ as the mean of the vector produced by calling `predict`
    on the fit object, as described above

## EIF Procedure

* _input_: matrix produced by $Q^*$: a 2-column matrix, with columns for
    $\bar{Q}_n(A_i, W_i)$ and $\bar{Q}_n(A_i + \delta, W_i)$
* _output_: scalar, the variance of the efficient influence function
* note that we have the _efficient influence function_ (EIF): $D(P)(o) =
    H(a,w)(y - \bar{Q}(a,w)) + \bar{Q}(d(a,w)) - \psi(P)$
* to compute the EIF from the above, we may set up a function like the following
    `eif <- function(Y, H, Qn_AW, Qn_dAW, Psi)`, which can then compute $\psi$
    by calling 1-TMLE (alternatively, the mean of the vector `Qn_dAW`) and then
    using the formula above
* compute $\sigma^2_n = \frac{1}{n}(EIF^2)$, that is simply call mean on the
    vector produced by the above