In [116]:
Ndata <- 50

delta <- ifelse(runif(Ndata) < 0.4, 0, 1)

x.1 <- rchisq(Ndata, df = 4)
x.2 <- rchisq(Ndata, df = 2)

x <- ifelse(delta == 1, x.1, x.2)

beta <- c(-1, -1, 1/2)

lambda <- exp(beta[1] + beta[2]*delta + beta[3]*x)

generateY <- function(lambda.i) rpois(1, lambda = lambda.i)

y <- sapply(lambda, generateY)

data.Pois <- data.frame("N" = y, factor = ifelse(delta == 1, TRUE, FALSE), measure = x)
data.Pois

N,factor,measure
0,False,1.72066833
0,False,0.25211339
0,False,0.47122734
52,True,12.02323659
1,False,3.55898577
0,False,0.98498295
0,False,0.83715215
5,True,6.26425166
1,True,4.0453312
1,True,3.35237419


In [110]:
library(stats)

In [117]:
glm.output <- glm(N ~ factor + measure, data=data.Pois, family="poisson")
summary(glm.output)


Call:
glm(formula = N ~ factor + measure, family = "poisson", data = data.Pois)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5333  -0.9476  -0.3869   0.4293   1.9262  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.00335    0.20215  -4.963 6.93e-07 ***
factorTRUE  -0.83087    0.24258  -3.425 0.000614 ***
measure      0.48579    0.01709  28.426  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1676.883  on 49  degrees of freedom
Residual deviance:   39.601  on 47  degrees of freedom
AIC: 135.01

Number of Fisher Scoring iterations: 5


In [118]:
# function that generates random draw for data of specified length
random_draw <- function(ndata) {
  gen_y <- data.frame()
  n = ndata
  delta <- ifelse(runif(n) < 0.4, 0, 1)
  beta <- c(-1, -1, 1/2)
  x.1 <- rchisq(n, df = 4)
  x.2 <- rchisq(n, df = 2)
  x <- ifelse(delta == 1, x.1, x.2)
  lambda <- exp(beta[1] + beta[2]*delta + beta[3]*x)
  df <- as.data.frame(cbind(lambda, delta, x))
  for (value in lambda) {
    Y <- rpois(1, lambda = value)
    gen_y <- rbind(gen_y, Y) 
  }
  df <- as.data.frame(cbind(df, gen_y))
  df
}
random_draw(50)

lambda,delta,x,X2L
0.8532447,0,1.68258225,2
8.3580212,1,8.2464434,10
0.4729393,1,2.50242348,0
0.5465607,1,2.79178025,1
0.83179,0,1.63164934,0
0.3804864,0,0.06739054,0
0.9057888,1,3.80210183,2
0.4137977,0,0.23524385,0
0.88309,0,1.75134357,0
0.4256815,0,0.29187231,0


In [122]:
# Function that conducts a GLM estimate for each draw and constructs confidence intervals for parameters and transformation
estimate <- function(n){
    datasize = n
    data = random_draw(datasize)
    finaldata = as.data.frame(data)
    y = finaldata[,4]
    delta = finaldata[,2]
    x = finaldata[,3]
    data.pois <- data.frame(outcome = y, factor = ifelse(delta == 1, TRUE, FALSE), measure = x)
    glm.output <- glm(outcome ~ factor + measure, data = data.pois, family="poisson")
    summary(glm.output)
}


In [125]:
estimate(1000)


Call:
glm(formula = outcome ~ factor + measure, family = "poisson", 
    data = data.pois)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.8138  -0.9559  -0.4219   0.5904   2.6988  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.024126   0.040722  -25.15   <2e-16 ***
factorTRUE  -0.936643   0.049936  -18.76   <2e-16 ***
measure      0.496601   0.003151  157.62   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 29171.23  on 999  degrees of freedom
Residual deviance:   995.14  on 997  degrees of freedom
AIC: 2607.3

Number of Fisher Scoring iterations: 5
