# Fitting Various Models in R

In [1]:
library(data.table)
library(MASS)
library(VGAM)

ichs.data <- data.table(read.table('ichs.txt'))
head(ichs.data)

Loading required package: stats4
Loading required package: splines


id,gender,height,cosv,sinv,xero,baseage,age,infect
121013,0,-3,-1,0,0,31,31,0
121013,0,-3,0,-1,0,31,34,0
121013,0,-2,1,0,0,31,37,0
121013,0,-2,0,1,0,31,40,0
121013,0,-2,-1,0,0,31,43,1
121013,0,-3,0,-1,0,31,46,0


In [2]:
bernoulli.model <- glm(infect ~ 1, data=ichs.data, family = quasibinomial)
summary(bernoulli.model)


Call:
glm(formula = infect ~ 1, family = quasibinomial, data = ichs.data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.4322  -0.4322  -0.4322  -0.4322   2.1987  

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -2.3239     0.1013  -22.93   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasibinomial family taken to be 1.000834)

    Null deviance: 721.45  on 1199  degrees of freedom
Residual deviance: 721.45  on 1199  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [3]:
binomial.model <- glm(
    cbind(infect, n - infect) ~ 1, family = quasibinomial,
    data=ichs.data[,list(n=length(infect), infect=sum(infect)),by=.(id)])
summary(binomial.model)


Call:
glm(formula = cbind(infect, n - infect) ~ 1, family = quasibinomial, 
    data = ichs.data[, list(n = length(infect), infect = sum(infect)), 
        by = .(id)])

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.0587  -1.0587  -0.7486   0.6014   4.3975  

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -2.3239     0.1175  -19.77   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasibinomial family taken to be 1.346287)

    Null deviance: 312.47  on 274  degrees of freedom
Residual deviance: 312.47  on 274  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [4]:
beta.binomial.model <- vglm(
    cbind(infect, n - infect) ~ 1, family = betabinomial,
    data=ichs.data[,list(n=length(infect), infect=sum(infect)),by=.(id)])
summary(beta.binomial.model)

"22 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12"


Call:
vglm(formula = cbind(infect, n - infect) ~ 1, family = betabinomial, 
    data = ichs.data[, list(n = length(infect), infect = sum(infect)), 
        by = .(id)])


Pearson residuals:
               Min         1Q  Median     3Q    Max
logit(mu)  -0.7131 -7.131e-01 -0.5279 0.8009  4.098
logit(rho) -1.2916  2.847e-11  0.1347 0.2801 11.956

Coefficients: 
              Estimate Std. Error z value Pr(>|z|)    
(Intercept):1  -2.3117     0.1159  -19.95  < 2e-16 ***
(Intercept):2  -2.4126     0.4188   -5.76  8.4e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Number of linear predictors:  2 

Names of linear predictors: logit(mu), logit(rho)

Log-likelihood: -221.2778 on 548 degrees of freedom

Number of iterations: 3 

'(Intercept):2'

In [5]:
predict(
    beta.binomial.model,
    data=ichs.data[,list(n=length(infect), infect=sum(infect)),by=.(id)],
    type='response')

In [6]:
c(-2.4126 - 0.4188*qnorm(0.975) , -2.4126 + 0.4188*qnorm(0.975))

In [7]:
mu <- 1/(1 + exp(2.3117))

In [8]:
rho <- 1/(1 + exp(2.4126))

In [9]:
mu*(1-rho)/rho

In [10]:
(1-mu)*(1-rho)/rho

In [11]:
binomial.model <- glm(
    cbind(infect, n - infect) ~ 1 + baseage, family = quasibinomial,
    data=ichs.data[,list(n=length(infect), infect=sum(infect), baseage=mean(baseage)),by=.(id)])
summary(binomial.model)


Call:
glm(formula = cbind(infect, n - infect) ~ 1 + baseage, family = quasibinomial, 
    data = ichs.data[, list(n = length(infect), infect = sum(infect), 
        baseage = mean(baseage)), by = .(id)])

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4390  -0.8773  -0.5940   0.3292   4.1245  

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2.549413   0.135822 -18.770  < 2e-16 ***
baseage     -0.027499   0.006375  -4.314 2.25e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasibinomial family taken to be 1.196207)

    Null deviance: 312.47  on 274  degrees of freedom
Residual deviance: 287.74  on 273  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [12]:
beta.binomial.model <- vglm(
    cbind(infect, n - infect) ~ 1 + baseage, family = betabinomial,
    data=ichs.data[,list(n=length(infect), infect=sum(infect), baseage=mean(baseage)),by=.(id)])
summary(beta.binomial.model)

"Some weight matrices have negative eigenvalues. They will be assigned NAs"


Call:
vglm(formula = cbind(infect, n - infect) ~ 1 + baseage, family = betabinomial, 
    data = ichs.data[, list(n = length(infect), infect = sum(infect), 
        baseage = mean(baseage)), by = .(id)])


Coefficients: 
               Estimate Std. Error z value Pr(>|z|)    
(Intercept):1 -2.520914   0.135098 -18.660  < 2e-16 ***
(Intercept):2 -2.707920   0.507098  -5.340 9.29e-08 ***
baseage       -0.026456   0.006302  -4.198 2.69e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Number of linear predictors:  2 

Names of linear predictors: logit(mu), logit(rho)

Log-likelihood: -211.5263 on 547 degrees of freedom

Number of iterations: 6 

'(Intercept):1', '(Intercept):2'