In [238]:
library(boot)

In [226]:
Ndata <- 100

delta <- ifelse(runif(Ndata) < 0.4, 0, 1)

x.1 <- rchisq(Ndata, df = 4)
x.2 <- rchisq(Ndata, df = 2)

x <- ifelse(delta == 1, x.1, x.2)

beta <- c(-1, -1, 1/2)

lambda.hat <- exp(beta[1] + beta[2]*delta + beta[3]*x)

generateY <- function(lambda.i) rpois(1, lambda = lambda.i)

y <- sapply(lambda, generateY)

data.Pois <- data.frame("N" = y, lambda.hat = lambda, factor = delta, measure = x)
glm.output <- glm(N ~ factor + measure, data=data.Pois, family="poisson")


In [229]:
# function that generates random draw for data of specified length
random_draw <- function(ndata) {
  gen_y <- data.frame()
  n = ndata
  delta <- rbinom(n,1,prob=0.6)
  beta <- c(-1, -1, 1/2)
  x.1 <- rchisq(n, df = 4)
  x.2 <- rchisq(n, df = 2)
  x <- ifelse(delta == 1, x.1, x.2)
  lambda.hat <- exp(beta[1] + beta[2]*delta + beta[3]*x)
  df <- as.data.frame(cbind(lambda, delta, x))
  Y <- rpois(ndata, lambda = lambda.hat)
  df <- as.data.frame(cbind(Y, df))
  df
}

In [236]:
set.seed(10)
pois.data = random_draw(1000)
head(pois.data)

Y,lambda,delta,x
2,1.1714813,1,5.507764
1,0.3522979,1,1.56553
0,0.8125355,1,2.027902
1,0.6091049,0,1.007985
13,0.739087,1,8.585706
6,3.4228659,1,6.806052


**Delta-Method**

In [190]:
# function that conducts a GLM estimate for each draw and constructs confidence intervals for parameters and transformation
estimate <- function(data){
    # # run GLM function (linear predictor)
    df = as.data.frame(data)
    glm.output <- glm(Y ~ delta + x, data = df, family="poisson")
    beta.hat <- coef(glm.output)
    sigma.hat <- vcov(glm.output) (how much variability do we have on the parameter)
    fitted.values <- glm.output$fitted.values, 
    linear.predictors <- glm.output$linear.predictors
    
    # link function tells us how linear predictor relates to the average value of y
    log of the average outcome is giving us the GLM,
    how is the mean related to linear predictor (inverse of log is an exponential) - multiplicative model of the components
    
    # # construct confidence intervals for parameters (Y as a function of the GLM coefficients)
    # coefficients are expressed as log-odds (link fxn of poisson)
    derivative of transformationa nd then apply var/covar matrix to construct confidence interval 
    
    # # construct confidence intervals for transformation (Y as a function of the GLM coefficients (which have variance and covariance))
    # MLE (FOC)
    vector = c(beta.hat[1], beta.hat[2])
    #Numerical derivative function for delta method
    numerical.derivative<- function(estimation.fxn, FOC) {
    nn <- length(x)
    dx <- rep(NaN, nn)
    for (i in 1:nn) {
        eps <- rep(0,nn)
        eps[i] <- 1e-6
        dx[i] <- (fun(x+eps)-fun(x-eps))/(2e-6)
        }
    return(dx)}
    
    # Take SOC for var/cvar matrix
    vb <- vcov(glm.output)
    
    
}

estimate(pois.data)

ERROR: Error in parse(text = x, srcfile = src): <text>:7:40: unexpected symbol
6:     beta.hat <- coef(glm.output)
7:     sigma.hat <- vcov(glm.output) (how much
                                          ^


**Bootstrap Method**

In [223]:
# # Functions for boostrap fxn

# Return the probability of a 1 or 0 when x1=1, and  x2=4
prob.pois.0 <-  function( beta ) {
    dpois(0,lambda=exp(beta[1]+beta[2]+4*beta[3])) + dpois(1,lambda=exp(beta[1]+beta[2]+4*beta[3]))
} 

# defining function to generate data and return parameter coefficients
pois.bootstrap <- function(formula, data, indices) {
  d <- data[indices,]
  pois.fit <- glm(formula, data=d, family="poisson")
  return(coef(pois.fit))
}

# defining function to generate data and return transformation output
pois.bootstrap.transformation <- function(formula, data, indices) {
  d <- data[indices,]
  pois.fit <- glm(formula, data=d, family="poisson")
  prob.pois.0 <-  function( beta ) {
    dpois(0,lambda=exp(beta[1]+beta[2]+4*beta[3])) + dpois(1,lambda=exp(beta[1]+beta[2]+4*beta[3]))
    } 
  return(prob.pois.0(coef(pois.fit)))
}

In [240]:
# # Bootstrap function
bs_method_ <- function(data, n){
# # parameters
bootstrap.results_p <- boot(statistic=pois.bootstrap, data=data, R = n, formula=Y~delta+x)
bs.distribution <- bootstrap.results_p$t

bootstrap.out <-c("int_est"=mean(bs.distribution[,1]),
                   "int_se"= sd(bs.distribution[,1]),
                   "int_lowconf97.5"=quantile(bs.distribution[,1],0.025,names=FALSE), 
                   "int_upconf97.5"=quantile(bs.distribution[,1],0.975,names=FALSE),
                    "int_lowconf95"=quantile(bs.distribution[,1],0.05,names=FALSE),
                    "int_upconf95"=quantile(bs.distribution[,1],0.95,names=FALSE),
                    "int_lowconf90"=quantile(bs.distribution[,1],0.1,names=FALSE),
                    "int_upconf90"=quantile(bs.distribution[,1],0.1,names=FALSE),
                    # B0
                    "b0_est"=mean(bs.distribution[,2]),
                    "b0_se"= sd(bs.distribution[,2]),
                    "b0_lowconf97.5"=quantile(bs.distribution[,2],0.025,names=FALSE), 
                    "b0_upconf97.5"=quantile(bs.distribution[,2],0.975,names=FALSE),
                    "b0_lowconf95"=quantile(bs.distribution[,2],0.05,names=FALSE),
                    "b0_upconf95"=quantile(bs.distribution[,2],0.95,names=FALSE),
                    "b0_lowconf90"=quantile(bs.distribution[,2],0.1,names=FALSE),
                    "b0_upconf90"=quantile(bs.distribution[,2],0.1,names=FALSE),
                    # B1
                    "b1_est"=mean(bs.distribution[,3]),
                    "b1_se"= sd(bs.distribution[,3]),
                    "b1_lowconf97.5"=quantile(bs.distribution[,3],0.025,names=FALSE), 
                    "b1_upconf97.5"=quantile(bs.distribution[,3],0.975,names=FALSE),
                    "b1_lowconf95"=quantile(bs.distribution[,3],0.05,names=FALSE),
                    "b1_upconf95"=quantile(bs.distribution[,3],0.95,names=FALSE),
                    "b1_lowconf90"=quantile(bs.distribution[,3],0.1,names=FALSE),
                    "b1_upconf90"=quantile(bs.distribution[,3],0.1,names=FALSE))
bootstrap.results.df <- as.data.frame(matrix(bootstrap.out, nrow=3, ncol=8, byrow=TRUE))
colnames(bootstrap.results.df) <- c("estimate", "se", "2.5", "97.5", "5", "95","10","90")
rownames(bootstrap.results.df) <- c("intercept", "b0", "b1")

# # transformation
bootstrap.results_t <- boot(statistic=pois.bootstrap.transformation, data=data, R = n, formula=Y~delta+x)
bs.transformation.distribution <- bootstrap.results_t$t
    
bootstrap.out.transformation <- c(mean(bs.transformation.distribution),
                    sd(bs.transformation.distribution),
                    quantile(bs.transformation.distribution,0.025,names=FALSE), 
                    quantile(bs.transformation.distribution,0.975,names=FALSE),
                    quantile(bs.transformation.distribution,0.05,names=FALSE), 
                    quantile(bs.transformation.distribution,0.95,names=FALSE),
                    quantile(bs.transformation.distribution,0.10,names=FALSE), 
                    quantile(bs.transformation.distribution,0.90,names=FALSE))
bootstrap.results.df <- rbind(bootstrap.results.df, bootstrap.out.transformation)
rownames(bootstrap.results.df) <- c("intercept", "b0", "b1", "transformation")
bootstrap.results.df
}

In [241]:
bs_method_(pois.data, 1000)

Unnamed: 0,estimate,se,2.5,97.5,5,95,10,90
intercept,-1.006262,0.044629503,-1.0881297,-0.9083598,-1.0760499,-0.9323413,-1.0608699,-1.0608699
b0,-1.03758,0.040068426,-1.1285373,-0.9714247,-1.1140475,-0.9802946,-1.0931305,-1.0931305
b1,0.5031088,0.004259117,0.4950487,0.5115779,0.4965461,0.5103697,0.4977836,0.4977836
transformation,0.7464756,0.011303477,0.7249114,0.7701013,0.7276842,0.7657843,0.7322286,0.7613845


**Compare to Asymptotic Normal** 

In [None]:
round(
    cbind(simulation=simulation.out,delta.method=delta.method.out,bootstrap=bootstrap.out),4)

In [None]:
# define the function for parameter transformation (probability y<2)
prob.pois.0 <-  function( beta ) {
    dpois(0,lambda=exp(beta[1]+beta[2]+4*beta[3])) + dpois(1,lambda=exp(beta[1]+beta[2]+4*beta[3]))
} 

exp(beta[1] + beta[2]*delta + beta[3]*x)

pois.bootstrap <- function(formula, data, indices) {
  d <- data[indices,]
  pois.fit <- glm(formula, data=d,family="poisson")
  return(coef(pois.fit))
}

  # Return the probability of a 0 when x1=1, and  x2=-2
  return( prob.pois.0(coef(pois.fit)) ) 

bs.results.prob.0 <- boot(data=data.Pois, statistic=pois.bs,
   R=1000, formula=y~x1+x2)

bs.distribution <- bs.results.prob.0$t

base + geom_histogram(aes(x=bs.distribution,y = ..density..),binwidth=0.005,color=Pitt.Blue,fill=Pitt.Gold,size=2)+xlab("Outcome Prob")+ylab("Density")

bootstrap.out <-c("estimate"=mean(bs.distribution),
                   "std.err"= sd(bs.distribution),
                   "lower.conf"=quantile(bs.distribution,0.025,names=FALSE), # I set names FALSE here to stop quantile names being appended
                   "upper.conf"=quantile(bs.distribution,0.975,names=FALSE))
bootstrap.out

round(
    cbind(simulation=simulation.out,delta.method=delta.method.out,bootstrap=bootstrap.out),4)

boot.ci(bs.results,type="bca",index=3) #  x2

bs.Sigma <- cov( bs.results$t) # get the covariance matrix
rownames(bs.Sigma) <- c("(Intercept)","x1","x2")
colnames(bs.Sigma) <- c("(Intercept)","x1","x2")
bs.Sigma # Matrix

Sigma.hat # Matrix

sqrt(diag(bs.Sigma)) # Std. Errors for each parameter in isolation bootstrap
sqrt(diag(Sigma.hat)) # Std. Errors for each parameter in isolation delta method