# INLA: Create models and assess predictions

In [35]:
#library(rstanarm)
library(INLA)
library(brinla)
library(data.table)
library(ggplot2)
options(repr.plot.width=3, repr.plot.height=3)

In [36]:
# read data
df = fread('../data/le_cov_tr.csv')
nrow(df)

In [37]:
print(names(df))

 [1] "county"               "gender"               "income_q"            
 [4] "le"                   "county_name"          "population"          
 [7] "statename"            "stateabbrv"           "density"             
[10] "gini"                 "relative_mob"         "absolute_mob"        
[13] "segregation_income"   "segregation_race"     "income"              
[16] "poverty"              "middle_class"         "mig_inflow"          
[19] "mig_outflow"          "foreign"              "religion"            
[22] "crime_rate"           "uninsured"            "labor_force"         
[25] "unemployment"         "pct_black"            "pct_hispanic"        
[28] "obesity"              "smoking"              "exercise"            
[31] "house_value"          "college"              "medicare_expenses"   
[34] "local_gov_exp"        "male"                 "q2"                  
[37] "q3"                   "q4"                   "log_population"      
[40] "log_crime_rate"       "log_pover

In [38]:
df[, state := .GRP, by = statename]
df[, income_qr := .GRP, by = income_q]

In [39]:
table(df[, .(income_qr, income_q)]) # ok, right!

         income_q
income_qr   Q1   Q2   Q3   Q4
        1 3100    0    0    0
        2    0 3102    0    0
        3    0    0 3098    0
        4    0    0    0 3100

# INLA Models (using PC prior)

In [40]:
#names(dat)

In [41]:
# create auxiliary variables
df[, state_mob := state]
df[, state_gini := state]
df[, cty := county]
df[, cty_mob := county]
df[, cty_gini := county]
df[, q_mob := income_qr]
df[, q_gini := income_qr]

In [42]:
female = df[gender=='F']
male = df[gender=='M']

# Baseline model

### Male

lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income, male)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [44]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)

m1 = inla(formula, family = "gaussian", data = male,
#           control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE), 
#           control.inla = list(strategy ="gaussian"),
          verbose = TRUE)

In [45]:
m1$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),81.2476318,2.07555635,77.0603137,81.2475557,85.43674696,81.2477717,1.795559e-10
z_relative_mob,-0.3053746,0.13462126,-0.5660218,-0.305365,-0.04328471,-0.3051921,3.718951e-05
z_gini,0.2243529,0.18743681,-0.1420261,0.2243442,0.59125172,0.2244549,2.838629e-05
log_population,-0.228293,0.02795351,-0.2831673,-0.2283007,-0.17342846,-0.228314,2.442026e-14
log_income,1.232039,0.13380876,0.969245,1.232044,1.49456007,1.2320662,2.790317e-14


In [46]:
bri.hyperpar.summary(m1)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1317247,0.01211908,1.10658445,1.1323237,1.15398,1.1347625
SD for state,0.6942886,0.08535555,0.55672067,0.6830516,0.8895159,0.6548753
SD for cty,0.5370075,0.03005964,0.47129057,0.5403433,0.5859431,0.5551366
SD for income_qr,3.8519119,1.32871292,1.90993057,3.6248208,7.0737405,3.2183385
SD for q_mob,0.2205882,0.13244463,0.07811555,0.1834379,0.576016,0.1349803
SD for q_gini,0.2977208,0.17325169,0.10889687,0.2496264,0.7618955,0.1857306


### Female

In [47]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income, female)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [48]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


f1 = inla(formula, family = "gaussian", data = female,
#           control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE), 
#           control.inla = list(strategy ="gaussian"),
          verbose = TRUE)

In [49]:
f1$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),84.9474985,1.30870501,82.3049016,84.947431,87.5913489,84.9475011,5.603111e-09
z_relative_mob,-0.222883,0.06356958,-0.3470683,-0.2228987,-0.09854405,-0.2228937,5.322257e-06
z_gini,0.1439463,0.15918393,-0.1717756,0.1439453,0.45946306,0.1440033,1.9797e-05
log_population,-0.2148667,0.02759201,-0.269044,-0.2148683,-0.16073085,-0.2148693,1.822861e-14
log_income,1.098802,0.13204315,0.8395284,1.098796,1.35786746,1.0987951,2.547056e-14


In [50]:
bri.hyperpar.summary(f1)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.15963278,0.01191264,1.13689746,1.15934386,1.183652,1.1585153
SD for state,0.5469012,0.07004732,0.43149425,0.53858846,0.7051523,0.51870145
SD for cty,0.52466497,0.02203057,0.48181655,0.52455596,0.5682688,0.52498093
SD for income_qr,2.54320251,0.94900487,1.26164343,2.34395349,4.9316836,2.00805761
SD for q_mob,0.08795318,0.05720506,0.02551899,0.07236143,0.2409428,0.05153076
SD for q_gini,0.27765261,0.15396563,0.10137775,0.23704051,0.6855605,0.18042773


## First differences (simulation, baseline)

In [51]:
setorder(male, -le)
top_male = male[, .(county, state, le)][1:11]

In [52]:
setorder(male, le)
bottom_male = male[, .(county, state, le, income_q)][1:11]

In [53]:
# function to simulate predicted values

simulate_predictions = function(model, data, nsim = 1000, contrast='z_relative_mob', 
                               random = 'q_mob') {
    
    # simulate posterior distribution
    simData <- inla.posterior.sample(n=nsim, result=model)
    
    # define matrix model
    f = formula(paste0('~ ', paste0(names(data), collapse = ' + ')))
    X = model.matrix(f, dat = data)
    N = nrow(data)
    Ysim = matrix(nrow = N, ncol = nsim) 
    
    # loop
    for (i in 1:nsim) {
        rnames <- rownames(simData[[i]]$latent)
        rndQinc <- simData[[i]]$latent[grep('income_qr', rnames)]
        rndIncCoef <- simData[[i]]$latent[grep(random, rnames)]
        coefNames <- rownames(model$summary.fixed)
        Betas = simData[[i]]$latent[-grep("^Pred|^state|^cty|^obs|^income_qr|q_mob|q_gini", rnames)]
        names(Betas) = coefNames
        mt = matrix(rep(Betas, N), ncol = length(coefNames), nrow = N, byrow = TRUE)
        indc = grep(contrast, coefNames)
        indi = grep('Intercept', coefNames)
        mt[, indc] = mt[,indc] + rep(rndIncCoef, 2)
        # mt[,indi] = mt[,indi] + rep(rndQinc, 2)
        for (j in 1:N) {
            Ysim[j, i] <- X[j, ] %*% mt[j,]
        }
    }
    
    t = data.table(Ysim)
    t[, q := rep(1:4, times = 2)]
    t[, (contrast) := rep(c(0.0, 1.0), times = 1 , each = 4)]
    return(melt(t, id.vars = c('q', contrast)))
}

# first difference function
first_difference = function(simulated_data, value_variable, constrast_variable, 
                            simulation_index, group_variable) {
    output = data.table()
    gr = simulated_data[, unique(get(group_variable))]
    for (g in gr) { 
        diff = simulated_data[get(group_variable)==g, 
                     .(q=g, diff = diff(get(value_variable))), by=.(sim=get(simulation_index))]
        output = rbind(output, diff )
    }
    return(output)
}

# btw grouups

first_difference_between_groups = function(data, contrast = 'z_gini', group = 'q', model = 'm1') {
    c = gtools::combinations(n = 4, r = 2, v = c(1:4), repeats.allowed = FALSE)
    t = list()
    for (i in 1:nrow(c)) {
        a = data[get(group) == c[i,1] & get(contrast) == 1, value] - data[get(group) == c[i,1] & get(contrast) == 0, value]
        b = data[get(group) == c[i,2] & get(contrast) == 1, value] - data[get(group) == c[i,2] & get(contrast) == 0, value]
        varname = paste0(c[i,1], '-', c[i,2]) 
        t[[i]] = data.table(type =contrast,  contrast = varname, model = model, values = (a - b))
    }
    return(rbindlist(t))
}

### Income mobility 

In [54]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
relative_mob_pred_data = data.table(
    z_relative_mob = rep(c(0.0, 1.0), times = 1 , each = 4),
    z_gini = rep(0, nrep),
    log_population = rep(0, nrep), 
    log_income = rep(0, nrep))

In [55]:
sim_mob_male = simulate_predictions(model=m1, data=relative_mob_pred_data, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

In [56]:
sim_mob_male_m1 = first_difference(sim_mob_male, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_male_m1, file='../data/sim_mob_male_m1.csv')

In [57]:
sim_mob_female = simulate_predictions(model=f1, data=relative_mob_pred_data, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

In [58]:
sim_mob_female_m1 = first_difference(sim_mob_female, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_female_m1, file='../data/sim_mob_female_m1.csv')

### Inequality


In [59]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
gini_pred_data = data.table(
    z_relative_mob = rep(0, nrep),
    z_gini = rep(c(0.0, 1.0), times = 1 , each = 4),
    log_population = rep(0, nrep),     
    log_income = rep(0, nrep)
)

In [60]:
sim_gini_male = simulate_predictions(model=m1, data=gini_pred_data, 
                                           nsim=2000, 
                                           contrast='z_gini', 
                                           random='q_gini')

In [61]:
sim_gini_male_m1 = first_difference(sim_gini_male, 'value', 'z_gini', 'variable', 'q')
fwrite(sim_gini_male_m1, file='../data/sim_gini_male_m1.csv')

In [62]:
sim_gini_female = simulate_predictions(model=f1, data=gini_pred_data, 
                                           nsim=2000, 
                                           contrast='z_gini', 
                                           random='q_gini')

In [63]:
sim_gini_female_m1 = first_difference(sim_gini_female, 'value', 'z_gini', 'variable', 'q')
fwrite(sim_gini_female_m1, file='../data/sim_gini_female_m1.csv')

# Adjusting for contextual variables

### Male

In [103]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           log_crime_rate + log_poverty + log_mig_inflow + log_mig_outflow + 
           log_foreign + log_pct_black + log_pct_hispanic + log_house_value + 
           log_local_gov_exp + log_unemployment + z_segregation_income + z_religion + 
           z_labor_force + z_college + z_middle_class + z_uninsured + z_medicare_expenses, male)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [104]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income +
       log_crime_rate + log_poverty + log_mig_inflow + log_mig_outflow + 
       log_foreign + log_pct_black + log_pct_hispanic + log_house_value + 
       log_local_gov_exp + log_unemployment + z_segregation_income + z_religion + 
       z_labor_force + z_college + z_middle_class + z_uninsured + z_medicare_expenses +
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


m2 = inla(formula, family = "gaussian", data = male,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [105]:
bri.hyperpar.summary(m2)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1444495,0.01003046,1.12358195,1.1449688,1.1628025,1.1470537
SD for state,0.5016463,0.06959488,0.36880004,0.5012601,0.6400222,0.5048931
SD for cty,0.3741101,0.02426422,0.32694229,0.3740604,0.4220598,0.3750955
SD for income_qr,3.9120935,1.54969946,1.90137015,3.5613673,7.871652,2.9873358
SD for q_mob,0.2236224,0.13519896,0.07647995,0.1862049,0.585817,0.1370398
SD for q_gini,0.2666925,0.12728236,0.10949935,0.2360154,0.5972853,0.1893758


In [106]:
m2$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),81.142913675,1.90025927,77.33579277,81.14278405,84.948228267,81.142561866,1.275072e-10
z_relative_mob,-0.013120886,0.12076097,-0.2561829,-0.013140151,0.229278433,-0.013238792,6.736475e-05
z_gini,0.063129463,0.15366097,-0.25436726,0.063145842,0.380049395,0.063077528,5.703824e-07
log_population,-0.080224649,0.0369143,-0.15272209,-0.08022125,-0.007812378,-0.080210384,1.686774e-14
log_income,-0.203730729,0.24302523,-0.68088856,-0.203752098,0.27310326,-0.203773947,1.915028e-14
log_crime_rate,0.004243439,0.03098729,-0.05660284,0.004244329,0.065028945,0.004248802,1.815426e-15
log_poverty,-0.00418892,0.13024552,-0.26003198,-0.004160552,0.251262571,-0.004089671,1.978716e-14
log_mig_inflow,0.056221614,0.08924059,-0.11898356,0.056209829,0.231328952,0.05619326,2.031207e-14
log_mig_outflow,-0.340854218,0.10955921,-0.55598986,-0.340854958,-0.125913622,-0.340846721,1.965492e-14
log_foreign,-0.025198862,0.0532481,-0.12992005,-0.025144881,0.079128266,-0.025030129,1.508513e-14


#### Variables

```
[1] "county"               "gender"               "income_q"            
 [4] "le"                   "county_name"          "population"          
 [7] "statename"            "stateabbrv"           "density"             
[10] "gini"                 "relative_mob"         "absolute_mob"        
[13] "segregation_income"   "segregation_race"     "income"              
[16] "poverty"              "middle_class"         "mig_inflow"          
[19] "mig_outflow"          "foreign"              "religion"            
[22] "crime_rate"           "uninsured"            "labor_force"         
[25] "unemployment"         "pct_black"            "pct_hispanic"        
[28] "obesity"              "smoking"              "exercise"            
[31] "house_value"          "college"              "medicare_expenses"   
[34] "local_gov_exp"        "male"                 "q2"                  
[37] "q3"                   "q4"                   "log_population"      
[40] "log_crime_rate"       "log_poverty"          "log_mig_inflow"      
[43] "log_mig_outflow"      "log_foreign"          "log_pct_black"       
[46] "log_pct_hispanic"     "log_house_value"      "log_local_gov_exp"   
[49] "log_unemployment"     "log_income"           "z_gini"              
[52] "z_relative_mob"       "z_absolute_mob"       "z_middle_class"      
[55] "z_segregation_income" "z_religion"           "z_labor_force"       
[58] "z_uninsured"          "z_medicare_expenses"  "z_college"           
[61] "z_obesity"            "z_smoking"            "z_exercise" ```

In [107]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
relative_mob_pred = data.table(
    z_relative_mob       = rep(c(0.0, 1.0), times = 1 , each = 4),
    z_gini               = rep(0, nrep),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    log_crime_rate       = rep(0, nrep),
    log_poverty          = rep(0, nrep),
    log_mig_inflow       = rep(0, nrep),
    log_mig_outflow      = rep(0, nrep),
    log_foreign          = rep(0, nrep),
    log_pct_black        = rep(0, nrep),
    log_pct_hispanic     = rep(0, nrep),
    log_house_value      = rep(0, nrep),
    log_local_gov_exp    = rep(0, nrep),
    log_unemployment     = rep(0, nrep),
    z_segregation_income = rep(0, nrep),
    z_religion           = rep(0, nrep),
    z_labor_force        = rep(0, nrep),
    z_college            = rep(0, nrep),
    z_middle_class       = rep(0, nrep),
    z_uninsured          = rep(0, nrep), 
    z_medicare_expenses  = rep(0, nrep))

gini_pred = data.table(
    z_relative_mob        = rep(0, nrep),
    z_gini               = rep(c(0.0, 1.0), times = 1 , each = 4),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    log_crime_rate       = rep(0, nrep),
    log_poverty          = rep(0, nrep),
    log_mig_inflow       = rep(0, nrep),
    log_mig_outflow      = rep(0, nrep),
    log_foreign          = rep(0, nrep),
    log_pct_black        = rep(0, nrep),
    log_pct_hispanic     = rep(0, nrep),
    log_house_value      = rep(0, nrep),
    log_local_gov_exp    = rep(0, nrep),
    log_unemployment     = rep(0, nrep),
    z_segregation_income = rep(0, nrep),
    z_religion           = rep(0, nrep),
    z_labor_force        = rep(0, nrep),
    z_college            = rep(0, nrep),
    z_middle_class       = rep(0, nrep),
    z_uninsured          = rep(0, nrep), 
    z_medicare_expenses  = rep(0, nrep))

In [108]:
# simulate
sim_mob_male = simulate_predictions(model=m2, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_male_m2 = first_difference(sim_mob_male, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_male_m2, file='../data/sim_mob_male_m2.csv')

In [109]:
sim_gini_male = simulate_predictions(model=m2, data=gini_pred, 
                                           nsim=2000, 
                                           contrast='z_gini', 
                                           random='q_gini')

sim_gini_male_m2 = first_difference(sim_gini_male, 'value', 'z_gini', 'variable', 'q')
fwrite(sim_gini_male_m2, file='../data/sim_gini_male_m2.csv')

### Female

In [110]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           log_crime_rate + log_poverty + log_mig_inflow + log_mig_outflow + 
           log_foreign + log_pct_black + log_pct_hispanic + log_house_value + 
           log_local_gov_exp + log_unemployment + z_segregation_income + z_religion + 
           z_labor_force + z_college + z_middle_class + z_uninsured + z_medicare_expenses, female)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [111]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income +
       log_crime_rate + log_poverty + log_mig_inflow + log_mig_outflow + 
       log_foreign + log_pct_black + log_pct_hispanic + log_house_value + 
       log_local_gov_exp + log_unemployment + z_segregation_income + z_religion + 
       z_labor_force + z_college + z_middle_class + z_uninsured + z_medicare_expenses + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


f2 = inla(formula, family = "gaussian", data = female,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [112]:
f2$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),84.879309381,1.32672097,82.2528708,84.879216073,87.501537818,84.879216388,2.058968e-09
z_relative_mob,0.009286034,0.06594163,-0.11604545,0.009277622,0.134741795,0.009271338,1.450574e-06
z_gini,0.083124493,0.1492652,-0.19772027,0.083096981,0.363768508,0.083123068,0.0001452596
log_population,-0.061529878,0.03810786,-0.1363495,-0.061534115,0.01324381,-0.061539282,3.016216e-14
log_income,-0.442710101,0.25259357,-0.93871538,-0.442713247,0.052851796,-0.442698315,2.838482e-14
log_crime_rate,0.089334097,0.03206673,0.02637812,0.089332285,0.152242506,0.089331395,4.217556e-16
log_poverty,-0.316929607,0.13418503,-0.58041363,-0.316933966,-0.053666005,-0.316931256,2.793959e-14
log_mig_inflow,0.001710091,0.09258113,-0.18008595,0.001708603,0.183345388,0.001713326,2.874231e-14
log_mig_outflow,-0.180206031,0.11389338,-0.40382787,-0.180216011,0.043263665,-0.180226084,2.995582e-14
log_foreign,-0.099859635,0.05491421,-0.20779284,-0.099827743,0.007793093,-0.099762159,4.141839e-14


In [113]:
bri.hyperpar.summary(f2)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1519235,0.02345283,1.09828021,1.15550759,1.1865981,1.17228996
SD for state,0.46902888,0.06080145,0.36398179,0.46372022,0.6022787,0.45208074
SD for cty,0.40707538,0.03280718,0.3555077,0.40234844,0.4824804,0.38808734
SD for income_qr,2.64564791,0.83611003,1.50884148,2.46974064,4.7401439,2.15020627
SD for q_mob,0.09955766,0.05732662,0.03498131,0.08420696,0.2519821,0.06327253
SD for q_gini,0.29646177,0.17592683,0.11358945,0.24546396,0.7713704,0.17913003


In [114]:
# simulate
sim_mob_female = simulate_predictions(model=f2, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_female_m2 = first_difference(sim_mob_female, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_female_m2, file='../data/sim_mob_female_m2.csv')

In [76]:
sim_gini_female = simulate_predictions(model=f2, data=gini_pred, 
                                           nsim=2000, 
                                           contrast='z_gini', 
                                           random='q_gini')

sim_gini_female_m2 = first_difference(sim_gini_female, 'value', 'z_gini', 'variable', 'q')
fwrite(sim_gini_female_m2, file='../data/sim_gini_female_m2.csv')

# Adjusting for Health Behaviors

### Male

In [None]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_smoking + z_exercise +  z_obesity, male)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [91]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_smoking + z_exercise +  z_obesity + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


m3 = inla(formula, family = "gaussian", data = male,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [92]:
m3$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),81.23418597,1.58082445,78.04725524,81.23413977,84.42187,81.23421639,3.055245e-10
z_relative_mob,-0.27776599,0.11695839,-0.51259765,-0.27773024,-0.0445671,-0.2777899,2.100317e-05
z_gini,0.20885515,0.15583118,-0.11125623,0.20881596,0.5288205,0.20872397,3.177029e-07
log_population,-0.23141664,0.02725623,-0.28491586,-0.23142482,-0.1779217,-0.23143878,2.101557e-14
log_income,1.14529116,0.13086912,0.88842041,1.14525124,1.402144,1.14518159,1.6206e-14
z_smoking,-0.10975354,0.02032315,-0.14966383,-0.10975147,-0.06989132,-0.10974559,0.0
z_exercise,0.11994164,0.02564266,0.06960379,0.11993784,0.1702544,0.11993238,1.978633e-15
z_obesity,-0.03514575,0.01787669,-0.0702537,-0.03514324,-8.390304e-05,-0.03513665,1.017788e-15


In [93]:
bri.hyperpar.summary(m3)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1341092,0.01184189,1.1115048,1.1338235,1.1579835,1.1329999
SD for state,0.6581705,0.07951981,0.50648,0.6574437,0.8171191,0.6604503
SD for cty,0.5149371,0.02234541,0.4730489,0.5141515,0.5607561,0.5123794
SD for income_qr,3.1385574,1.03069619,1.7204022,2.9279201,5.7097992,2.5539885
SD for q_mob,0.1966822,0.11522246,0.0720993,0.16444,0.505793,0.1218954
SD for q_gini,0.2706458,0.13113738,0.1058137,0.2400851,0.6090776,0.1928002


In [94]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
relative_mob_pred = data.table(
    z_relative_mob       = rep(c(0.0, 1.0), times = 1 , each = 4),
    z_gini               = rep(0, nrep),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    z_smoking            = rep(0, nrep),
    z_exercise           = rep(0, nrep),
    z_obesity            = rep(0, nrep))

gini_pred = data.table(
    z_relative_mob        = rep(0, nrep),
    z_gini               = rep(c(0.0, 1.0), times = 1 , each = 4),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    z_smoking            = rep(0, nrep),
    z_exercise           = rep(0, nrep),
    z_obesity            = rep(0, nrep))

In [102]:
sim_mob_male = simulate_predictions(model=m3, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_male_m3 = first_difference(sim_mob_male, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_male_m3, file='../data/sim_mob_male_m3.csv')

### Female

In [96]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_smoking + z_exercise +  z_obesity, female)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [97]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_smoking + z_exercise +  z_obesity + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


f3 = inla(formula, family = "gaussian", data = female,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [98]:
f3$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),84.941948393,1.18256365,82.55396155,84.941874221,87.33126621,84.941913539,5.154034e-11
z_relative_mob,-0.204854997,0.06145614,-0.3312342,-0.204858313,-0.07844124,-0.204859556,2.219569e-10
z_gini,0.133334701,0.15015657,-0.17939378,0.133327359,0.44586931,0.133345887,6.604594e-09
log_population,-0.21217821,0.0274209,-0.2660141,-0.212181726,-0.1583729,-0.212186492,1.845681e-14
log_income,1.061500761,0.13160812,0.80316158,1.061467352,1.31978472,1.061411171,1.922941e-14
z_smoking,-0.099819595,0.02074491,-0.14055807,-0.099817519,-0.05912998,-0.099811572,1.007737e-15
z_exercise,0.064483144,0.02611443,0.01321517,0.064480607,0.11571812,0.064477725,6.359297e-16
z_obesity,-0.003385475,0.01819595,-0.03911763,-0.003383883,0.03230509,-0.003379128,9.875022e-16


In [99]:
bri.hyperpar.summary(f3)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.16135061,0.01206989,1.13738257,1.16145699,1.1847574,1.16204278
SD for state,0.53100441,0.06531424,0.41334135,0.52728795,0.6694737,0.52030663
SD for cty,0.51038552,0.02245737,0.46778814,0.50980918,0.555945,0.50867789
SD for income_qr,2.23573564,0.76306481,1.11818115,2.10582284,4.0842425,1.87294939
SD for q_mob,0.08633105,0.04663242,0.02689292,0.07605174,0.2055061,0.05833831
SD for q_gini,0.24804205,0.1409115,0.08273996,0.21228244,0.6186579,0.16055006


In [101]:
sim_mob_female = simulate_predictions(model=f3, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_female_m3 = first_difference(sim_mob_female, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_female_m3, file='../data/sim_mob_female_m3.csv')

# Create tables

In [130]:
library(texreg)
source('../functions/extract_inla.R')

In [133]:
cmodels <- rep(c('Women', 'Men'), 3)

models <- list(f1, m1, f3, m3, f2, m2)

cnames <- list("(Intercept)" = 'Constant',
               'z_relative_mob' = 'Standardized Income mobility (Rank-Rank Slope)',
               'z_gini' = 'Standardized Gini',
               'log_income' = 'Log income (centered)',
               "sd for state" = "SD states",
               "sd for cty" = "SD counties",
               "sd for income_qr" = "SD income quartiles",
               "sd for q_mob" = "SD mobility by income quartile",
               "sd for q_gini" = "SD Gini by income quartile",
               "sd for the Gaussian observations" = "SD observations")

# screenreg(models)
texreg(models,
            include.dic = TRUE, include.waic = TRUE,
            ci.test = FALSE,
            float.pos = "htp",
            caption = "Life Expectancy (40) Models",
            booktabs = TRUE,
            use.packages = FALSE,
            dcolumn = TRUE,
            caption.above = TRUE,
            scalebox = 0.65,
            label = "inla_models",
            # sideways = TRUE,
            digits = 2,
            custom.model.names = cmodels,
            custom.coef.map = cnames,
            groups = list("Random Effects" = c(5:10)),
            custom.note = "Note: Selected coefficients (mean of marginal posterior distribution). 95\\% credibility intervals.",
             file = "tables/inla_models.tex")

The table was written to the file 'tables/inla_models.tex'.

