# INLA: Create models and assess predictions

Bayesian models using INLA and simulations to assess the magnitude of the effects. 

- Data are aggregated LE (40) per income quartile (12000 records)
- Analysis by gender
- Random intercept effects: state, county and income quartile
- Varying slopes: income mobility and gini
- I use PC priors, pending sensitivity analysis of priors (e.g., try default, gamma, half-cauchy)

Simulations assume that all the fixed and random effects are on their average, except for the varying slope examined. More details on the file 'myFunctions.R'

In [1]:
#library(rstanarm)
library(INLA)
library(brinla)
library(data.table)
library(ggplot2)
options(repr.plot.width=3, repr.plot.height=3)

Loading required package: sp
Loading required package: Matrix
This is INLA_17.05.31 built 2017-05-31 13:53:28 UTC.
See www.r-inla.org/contact-us for how to get help.


In [2]:
# read data
df = fread('../data/le_cov_sel.csv')
nrow(df)

In [3]:
print(names(df))

 [1] "le"                   "z_relative_mob"       "z_gini"              
 [4] "county"               "gender"               "income_q"            
 [7] "county_name"          "stateabbrv"           "statename"           
[10] "log_population"       "log_income"           "z_segregation_income"
[13] "log_unemployment"     "z_uninsured"          "z_medicare_expenses" 
[16] "log_crime_rate"       "log_pct_black"        "log_pct_hispanic"    
[19] "z_obesity"            "z_smoking"            "z_exercise"          


In [4]:
df[, state := .GRP, by = statename]
df[, income_qr := .GRP, by = income_q]

In [5]:
table(df[, .(income_qr, income_q)]) # ok, right!

         income_q
income_qr   Q1   Q2   Q3   Q4
        1 3000    0    0    0
        2    0 3000    0    0
        3    0    0 3000    0
        4    0    0    0 3000

# INLA Models (using PC prior)

In [6]:
# create auxiliary variables
df[, state_mob := state]
df[, state_gini := state]
df[, cty := county]
df[, cty_mob := county]
df[, cty_gini := county]
df[, q_mob := income_qr]
df[, q_gini := income_qr]
df[, q_exercise := income_qr]

In [7]:
female = df[gender=='F']
male = df[gender=='M']

# Baseline model

### Male

In [17]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income, male)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [18]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)

m1 = inla(formula, family = "gaussian", data = male,
#           control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE), 
#           control.inla = list(strategy ="gaussian"),
          verbose = TRUE)

In [19]:
m1$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),41.2589651,2.05299252,37.1039991,41.2588719,45.41701698,41.2590213,1.575412e-09
z_relative_mob,-0.3150019,0.13643464,-0.5801898,-0.3150137,-0.04910738,-0.3149391,0.000105071
z_gini,0.2266449,0.17894734,-0.1280964,0.2266376,0.58109867,0.2266876,2.173887e-05
log_population,-0.2349964,0.02863033,-0.2912053,-0.2350006,-0.17881605,-0.2350069,2.539561e-14
log_income,1.2430655,0.13820062,0.971677,1.2430669,1.5141946,1.2430814,1.453216e-14


In [20]:
bri.hyperpar.summary(m1)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1421686,0.01194267,1.11888916,1.1420846,1.1657778,1.1419665
SD for state,0.6973378,0.08282876,0.55812824,0.6884946,0.8821627,0.6674442
SD for cty,0.5397478,0.02207483,0.49650306,0.5397732,0.5831084,0.5406697
SD for income_qr,3.9955272,1.40169639,1.98038655,3.743677,7.4238823,3.2994403
SD for q_mob,0.2306179,0.14516673,0.08028984,0.1886313,0.6227559,0.1359646
SD for q_gini,0.3002662,0.17112233,0.1075542,0.2543624,0.7555401,0.1915429


### Female

In [21]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income, female)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [22]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


f1 = inla(formula, family = "gaussian", data = female,
#           control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE), 
#           control.inla = list(strategy ="gaussian"),
          verbose = TRUE)

In [24]:
f1$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),44.9434468,1.33778755,42.2406314,44.9433777,47.647732,44.9434538,4.255462e-09
z_relative_mob,-0.2378796,0.06061195,-0.3607283,-0.2378844,-0.114817,-0.237863,4.127452e-07
z_gini,0.1492133,0.15398984,-0.1613323,0.1491843,0.459966,0.1492025,2.621297e-05
log_population,-0.2201635,0.02836916,-0.2758678,-0.220165,-0.164503,-0.2201656,2.370991e-14
log_income,1.1173786,0.13690957,0.8485442,1.1173734,1.385991,1.1173744,1.480753e-14


In [25]:
bri.hyperpar.summary(f1)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1665489,0.01219703,1.14298546,1.16637305,1.1908692,1.1659446
SD for state,0.5553239,0.07493855,0.43616036,0.54491658,0.7281627,0.51937442
SD for cty,0.5211425,0.02295948,0.47812336,0.52032821,0.5682408,0.51846579
SD for income_qr,2.6045668,0.93873537,1.28953963,2.42356952,4.9297044,2.1107876
SD for q_mob,0.0954917,0.05888751,0.02959636,0.07977246,0.2521277,0.05824551
SD for q_gini,0.2842144,0.1744879,0.10151493,0.23410995,0.7546125,0.17014908


## First differences (simulation, baseline)

### Income mobility 

In [2]:
# load functions
source('functions/simulation_INLA.R')
source('functions/first_difference_INLA.R')

In [32]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
relative_mob_pred_data = data.table(
    z_relative_mob = rep(c(0.0, 1.0), times = 1 , each = 4),
    z_gini = rep(0, nrep),
    log_population = rep(0, nrep), 
    log_income = rep(0, nrep))

In [33]:
sim_mob_male = simulate_predictions(model=m1, data=relative_mob_pred_data, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

In [35]:
sim_mob_male_m1 = first_difference(sim_mob_male, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_male_m1, file='../data/sim_mob_male_m1.csv')

In [36]:
sim_mob_female = simulate_predictions(model=f1, data=relative_mob_pred_data, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

In [37]:
sim_mob_female_m1 = first_difference(sim_mob_female, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_female_m1, file='../data/sim_mob_female_m1.csv')

### Inequality


In [None]:
# nrep = 4 * 2 # 4 quartiles for 2 contrast values
# gini_pred_data = data.table(
#     z_relative_mob = rep(0, nrep),
#     z_gini = rep(c(0.0, 1.0), times = 1 , each = 4),
#     log_population = rep(0, nrep),     
#     log_income = rep(0, nrep)
# )

In [None]:
# sim_gini_male_m1 = first_difference(sim_gini_male, 'value', 'z_gini', 'variable', 'q')
# fwrite(sim_gini_male_m1, file='../data/sim_gini_male_m1.csv')

In [None]:
# sim_gini_female = simulate_predictions(model=f1, data=gini_pred_data, 
#                                            nsim=2000, 
#                                            contrast='z_gini', 
#                                            random='q_gini')

In [None]:
# sim_gini_female_m1 = first_difference(sim_gini_female, 'value', 'z_gini', 'variable', 'q')
# fwrite(sim_gini_female_m1, file='../data/sim_gini_female_m1.csv')

# Adjusting for contextual variables

### Male

In [163]:
# -Demographic: % Black, Hispanic (since Chetty uses this to race adjust)
# -Social: Crime rate, segregation
# -Economic: Gini, possibly unemployment
# -Health Access: uninsured, medicare expenses

In [46]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           log_crime_rate + z_segregation_income +  log_pct_black + log_pct_hispanic + 
           log_unemployment +  z_uninsured + z_medicare_expenses, male)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [47]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income +
       log_crime_rate + z_segregation_income +  log_pct_black + log_pct_hispanic + 
       log_unemployment +  z_uninsured + z_medicare_expenses +
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


m2 = inla(formula, family = "gaussian", data = male,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [49]:
m2$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),41.17177628,1.96543263,37.19756395,41.17167635,45.147638645,41.17167103,1.954105e-08
z_relative_mob,-0.18825628,0.14174075,-0.46608106,-0.18826621,0.088607907,-0.18834797,4.850234e-05
z_gini,0.19967509,0.17477988,-0.14507077,0.19965688,0.544131118,0.19966752,5.265731e-05
log_population,-0.08469662,0.03904244,-0.16131284,-0.08471544,-0.008046953,-0.08474916,2.503681e-14
log_income,0.70624723,0.15582462,0.40017048,0.70627097,1.01190446,0.70632988,3.029172e-14
log_crime_rate,-0.12856014,0.04464301,-0.21617814,-0.12857771,-0.040925966,-0.12860844,3.133471e-14
z_segregation_income,0.06153453,0.03676805,-0.01067728,0.06153704,0.13366473,0.06154499,2.919244e-14
log_pct_black,-0.09850717,0.02181736,-0.14135656,-0.09850565,-0.055706635,-0.09850113,2.27775e-14
log_pct_hispanic,-0.19160179,0.03058674,-0.25167627,-0.19159871,-0.13160007,-0.19158962,3.152192e-14
log_unemployment,-0.4487913,0.105735,-0.65638138,-0.44880855,-0.24129771,-0.44883272,1.924041e-14


In [51]:
bri.hyperpar.summary(m2)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1449524,0.01358673,1.1160136,1.1459727,1.1689096,1.1500779
SD for state,0.5758316,0.07658046,0.4288521,0.575707,0.7272624,0.5805005
SD for cty,0.462098,0.0305621,0.4135226,0.4578793,0.5317989,0.4446299
SD for income_qr,3.9365649,1.47692154,2.0326288,3.596579,7.7131483,3.0242132
SD for q_mob,0.2274117,0.13878178,0.0791064,0.1882953,0.6003293,0.1377324
SD for q_gini,0.3033538,0.18049481,0.1117002,0.2520238,0.7887868,0.1852826


In [52]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
relative_mob_pred = data.table(
    z_relative_mob       = rep(c(0.0, 1.0), times = 1 , each = 4),
    z_gini               = rep(0, nrep),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    log_crime_rate       = rep(0, nrep),
#     log_poverty          = rep(0, nrep),
#     log_mig_inflow       = rep(0, nrep),
#     log_mig_outflow      = rep(0, nrep),
#     log_foreign          = rep(0, nrep),
    log_pct_black        = rep(0, nrep),
    log_pct_hispanic     = rep(0, nrep),
#     log_house_value      = rep(0, nrep),
#     log_local_gov_exp    = rep(0, nrep),
    log_unemployment     = rep(0, nrep),
    z_segregation_income = rep(0, nrep),
#     z_religion           = rep(0, nrep),
#     z_labor_force        = rep(0, nrep),
#     z_college            = rep(0, nrep),
#     z_middle_class       = rep(0, nrep),
    z_uninsured          = rep(0, nrep), 
    z_medicare_expenses  = rep(0, nrep))

gini_pred = data.table(
    z_relative_mob        = rep(0, nrep),
    z_gini               = rep(c(0.0, 1.0), times = 1 , each = 4),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    log_crime_rate       = rep(0, nrep),
#     log_poverty          = rep(0, nrep),
#     log_mig_inflow       = rep(0, nrep),
#     log_mig_outflow      = rep(0, nrep),
#     log_foreign          = rep(0, nrep),
    log_pct_black        = rep(0, nrep),
    log_pct_hispanic     = rep(0, nrep),
#     log_house_value      = rep(0, nrep),
#     log_local_gov_exp    = rep(0, nrep),
    log_unemployment     = rep(0, nrep),
    z_segregation_income = rep(0, nrep),
#     z_religion           = rep(0, nrep),
#     z_labor_force        = rep(0, nrep),
#     z_college            = rep(0, nrep),
#     z_middle_class       = rep(0, nrep),
    z_uninsured          = rep(0, nrep), 
    z_medicare_expenses  = rep(0, nrep))

In [53]:
# simulate
sim_mob_male = simulate_predictions(model=m2, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_male_m2 = first_difference(sim_mob_male, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_male_m2, file='../data/sim_mob_male_m2.csv')

In [169]:
# sim_gini_male = simulate_predictions(model=m2, data=gini_pred, 
#                                            nsim=2000, 
#                                            contrast='z_gini', 
#                                            random='q_gini')

# sim_gini_male_m2 = first_difference(sim_gini_male, 'value', 'z_gini', 'variable', 'q')
# fwrite(sim_gini_male_m2, file='../data/sim_gini_male_m2.csv')

### Female

In [54]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
       log_crime_rate + z_segregation_income +  log_pct_black + log_pct_hispanic + 
       log_unemployment +  z_uninsured + z_medicare_expenses, female)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [55]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income +
       log_crime_rate + z_segregation_income +  log_pct_black + log_pct_hispanic + 
       log_unemployment +  z_uninsured + z_medicare_expenses +
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


f2 = inla(formula, family = "gaussian", data = female,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [56]:
f2$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),44.87198595,1.34057633,42.167755741,44.87197239,47.57725411,44.87217376,2.705002e-09
z_relative_mob,-0.11957199,0.07415679,-0.260276235,-0.11957961,0.02115938,-0.1195709,3.700395e-05
z_gini,0.13621546,0.15451528,-0.168238166,0.13619781,0.44094574,0.13625293,4.616079e-05
log_population,-0.05069792,0.03888614,-0.127057474,-0.0506982,0.0255925,-0.05069517,1.778166e-14
log_income,0.67428993,0.15572853,0.368517498,0.67427921,0.97983696,0.67427031,1.773967e-14
log_crime_rate,-0.13229834,0.04464526,-0.219961018,-0.1323007,-0.04470385,-0.13230145,1.697126e-14
z_segregation_income,0.03396511,0.03693499,-0.038561348,0.03396417,0.10642908,0.03396511,1.748465e-14
log_pct_black,-0.09885776,0.02158764,-0.141256476,-0.0988554,-0.05651148,-0.09884875,1.675068e-14
log_pct_hispanic,-0.20388972,0.03035986,-0.263479839,-0.20389944,-0.14430141,-0.20391656,2.070252e-14
log_unemployment,-0.28353962,0.10432303,-0.488419356,-0.28353308,-0.07888639,-0.2835109,1.88084e-14


In [57]:
bri.hyperpar.summary(f2)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1660684,0.01221475,1.14259712,1.16583889,1.1905465,1.16519772
SD for state,0.49329658,0.07190585,0.35396009,0.49395426,0.6330445,0.5011107
SD for cty,0.4498999,0.0236037,0.40398977,0.4498193,0.4965773,0.45051671
SD for income_qr,2.6177606,0.93674461,1.27854278,2.44723626,4.914653,2.1479201
SD for q_mob,0.09691228,0.07198012,0.02358339,0.07628144,0.2912862,0.05052299
SD for q_gini,0.2651322,0.15737406,0.09861895,0.22023864,0.6888345,0.16183819


In [58]:
# simulate
sim_mob_female = simulate_predictions(model=f2, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_female_m2 = first_difference(sim_mob_female, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_female_m2, file='../data/sim_mob_female_m2.csv')

In [175]:
# sim_gini_female = simulate_predictions(model=f2, data=gini_pred, 
#                                            nsim=2000, 
#                                            contrast='z_gini', 
#                                            random='q_gini')

# sim_gini_female_m2 = first_difference(sim_gini_female, 'value', 'z_gini', 'variable', 'q')
# fwrite(sim_gini_female_m2, file='../data/sim_gini_female_m2.csv')

# Adjusting for Health Behaviors

### Male

In [108]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_exercise, male)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [109]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_exercise + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior) 


m3 = inla(formula, family = "gaussian", data = male,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [110]:
m3$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),41.2247523,1.74535773,37.70177191,41.2247136,44.74970201,41.2249927,6.181287e-09
z_relative_mob,-0.3044884,0.12945054,-0.56594882,-0.3044931,-0.04291919,-0.304436,9.853095e-06
z_gini,0.2202385,0.15379686,-0.09706622,0.2202241,0.53769284,0.2202371,2.24713e-06
log_population,-0.2413516,0.02835417,-0.29701359,-0.2413575,-0.18570897,-0.2413669,2.373499e-14
log_income,1.1888699,0.13706463,0.91974994,1.1888585,1.45780333,1.1888473,1.181923e-14
z_exercise,0.1467661,0.08889015,-0.03321964,0.1465892,0.32725692,0.1463446,0.0005330427


In [111]:
bri.hyperpar.summary(m3)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.1412809,0.01200473,1.11815142,1.1410814,1.1652787,1.14054834
SD for state,0.6698884,0.07898783,0.53380762,0.6627456,0.8433178,0.64648545
SD for cty,0.5284034,0.02222154,0.48508176,0.5283387,0.5722761,0.52892102
SD for income_qr,3.481991,1.17934425,1.87821414,3.235334,6.4389483,2.80261496
SD for q_mob,0.2060098,0.11091733,0.07393552,0.1782169,0.4968479,0.13765937
SD for q_gini,0.2765909,0.15289286,0.10182517,0.2361601,0.6820126,0.17990047
SD for q_exercise,0.1361903,0.10198931,0.02986891,0.1076872,0.410187,0.06905563


In [63]:
nrep = 4 * 2 # 4 quartiles for 2 contrast values
relative_mob_pred = data.table(
    z_relative_mob       = rep(c(0.0, 1.0), times = 1 , each = 4),
    z_gini               = rep(0, nrep),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    z_smoking            = rep(0, nrep),
    z_exercise           = rep(0, nrep),
    z_obesity            = rep(0, nrep))

gini_pred = data.table(
    z_relative_mob        = rep(0, nrep),
    z_gini               = rep(c(0.0, 1.0), times = 1 , each = 4),
    log_population       = rep(0, nrep), 
    log_income           = rep(0, nrep),
    z_smoking            = rep(0, nrep),
    z_exercise           = rep(0, nrep),
    z_obesity            = rep(0, nrep))

In [64]:
sim_mob_male = simulate_predictions(model=m3, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_male_m3 = first_difference(sim_mob_male, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_male_m3, file='../data/sim_mob_male_m3.csv')

### Female

In [65]:
lmod <- lm(le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_smoking + z_exercise +  z_obesity, female)

# pc prior
sdres <- sd(residuals(lmod))
pcprior <- list(prec = list(prior="pc.prec", param = c(3*sdres,0.01)))

In [66]:
formula = le ~ z_relative_mob  + z_gini + log_population + log_income + 
           z_smoking + z_exercise +  z_obesity + 
       f(state, model = "iid", hyper = pcprior) + 
       f(cty, model = "iid", hyper = pcprior) + 
       f(income_qr, model = "iid", hyper = pcprior) + 
       f(q_mob, z_relative_mob , model = "iid", hyper = pcprior) + 
       f(q_gini, z_gini , model = "iid", hyper = pcprior)


f3 = inla(formula, family = "gaussian", data = female,
          control.predictor=list(compute = TRUE),
          control.compute = list(config = TRUE, dic = TRUE,
                                 waic = TRUE, cpo = TRUE),
          verbose = TRUE)

In [67]:
f3$summary.fixed

Unnamed: 0,mean,sd,0.025quant,0.5quant,0.975quant,mode,kld
(Intercept),44.9384249779,1.16899478,42.582647791,44.9383536174,47.29495127,44.9383819359,1.170564e-09
z_relative_mob,-0.220259406,0.06856275,-0.354721445,-0.2202626363,-0.08589992,-0.2202589712,1.002378e-05
z_gini,0.1391023923,0.15230063,-0.164469799,0.1390944538,0.44209344,0.1391048883,1.465377e-05
log_population,-0.2169687746,0.02828578,-0.272503126,-0.2169722899,-0.16146657,-0.2169769521,2.384991e-14
log_income,1.0821147368,0.13683743,0.813506314,1.0820812483,1.35065891,1.0820255024,1.778777e-14
z_smoking,-0.0965417518,0.02115614,-0.138086985,-0.0965399035,-0.05504488,-0.0965344,0.0
z_exercise,0.0609349481,0.02644237,0.009022572,0.0609325875,0.11281276,0.0609300946,1.550634e-15
z_obesity,-0.0004560196,0.01848253,-0.036750755,-0.0004544763,0.03579686,-0.0004498083,1.269852e-15


In [68]:
bri.hyperpar.summary(f3)

Unnamed: 0,mean,sd,q0.025,q0.5,q0.975,mode
SD for the Gaussian observations,1.16693271,0.01228498,1.14260481,1.16701153,1.1908262,1.16750817
SD for state,0.54299622,0.0679356,0.41878951,0.53995528,0.6850381,0.53532667
SD for cty,0.5095473,0.02312496,0.46425115,0.50958628,0.5549591,0.51065915
SD for income_qr,2.29255227,0.78186569,1.18882161,2.14343917,4.2230035,1.88211109
SD for q_mob,0.09585516,0.06469326,0.02399433,0.07873741,0.2677155,0.05392408
SD for q_gini,0.26306451,0.14838388,0.09740357,0.22279065,0.6586638,0.16803049


In [186]:
sim_mob_female = simulate_predictions(model=f3, data=relative_mob_pred, 
                                           nsim=2000, 
                                           contrast='z_relative_mob', 
                                           random='q_mob')

sim_mob_female_m3 = first_difference(sim_mob_female, 'value', 'z_relative_mob', 'variable', 'q')
fwrite(sim_mob_female_m3, file='../data/sim_mob_female_m3.csv')

# Create tables

In [112]:
library(texreg)
source('extract_inla.R')

Version:  1.36.23
Date:     2017-03-03
Author:   Philip Leifeld (University of Glasgow)

Please cite the JSS article in your publications -- see citation("texreg").


In [113]:
cmodels <- rep(c('Women', 'Men'), 2)

models <- list(f1, m1, f2, m2)

cnames <- list("(Intercept)" = 'Constant',
               'z_relative_mob' = 'Standardized Income mobility (Rank-Rank Slope)',
               'z_gini' = 'Standardized Gini',
               'log_income' = 'Log income (centered)',
               "sd for state" = "SD states",
               "sd for cty" = "SD counties",
               "sd for income_qr" = "SD income quartiles",
               "sd for q_mob" = "SD mobility by income quartile",
               "sd for q_gini" = "SD Gini by income quartile",
               "sd for the Gaussian observations" = "SD observations")

# screenreg(models)
texreg(models,
            include.dic = TRUE, include.waic = TRUE,
            ci.test = FALSE,
            float.pos = "htp",
            caption = "Life Expectancy (40) Models",
            booktabs = TRUE,
            use.packages = FALSE,
            dcolumn = TRUE,
            caption.above = TRUE,
            scalebox = 0.65,
            label = "inla_models",
            # sideways = TRUE,
            digits = 2,
            custom.model.names = cmodels,
            custom.coef.map = cnames,
            groups = list("Random Effects" = c(5:10)),
            custom.note = "Note: Selected coefficients (mean of marginal posterior distribution). 95\\% credibility intervals.",
             file = "tables/inla_models.tex")

The table was written to the file 'tables/inla_models.tex'.



# Change in counties at the bottom

# Spatial adjustments

In [22]:
library(USAboundaries)
library(maptools)
library(spdep)
library(sf)

ERROR: Error in library(sf): there is no package called ‘sf’


In [16]:
allcounties <- us_counties("2000-07-01")

In [18]:
length(unique(male$county)) / length(allcounties$fips) # 47%, ok, that's a bit problematic

In [12]:
male[, fips := sprintf("%05d", male$county)]

In [71]:
df_fips = unique(male$fips)

In [72]:
length(df_fips)

In [75]:
table(df_fips %in% allcounties$fips) # all of them!


TRUE 
1500 

In [76]:
mycounties <- allcounties[allcounties$fips %in% df_fips,]

In [89]:
county_data <- attr(mycounties, "data")
all(county_data$fips %in% df_fips)

In [19]:
# creating county layout using INLA

In [20]:
as(allcounties, 'Spatial')

ERROR: Error in as(allcounties, "Spatial"): no method or default for coercing “sf” to “Spatial”


In [19]:
temp <- poly2nb(allcounties)

ERROR: Error: extends(class(pl), "SpatialPolygons") is not TRUE


In [95]:
nb2INLA('mcounty.graph', temp)
county.adj <- 'mcounty.graph'

ERROR: Error: extends(class(pl), "SpatialPolygons") is not TRUE


In [90]:
H <- inla.read.graph(filename = county.adj)
image(inla.graph2matrix(H), xlab="",ylab="")

In [90]:
cc <- fortify(mycounties)

In [None]:
str(mycounties)

In [84]:
ccd <- cbind(cc, mycounties@data[cc$id,])

In [None]:
str(mycounties)

In [None]:
# plot(mycounties)
length(mycounties$fips)
ncounties <- length(unique(dat$fips))

In [None]:
unique(male$fips)[!(unique(male$fips) %in% fips)]

In [None]:
cdata <- attr(mycounties, "data")
all(as.character(cdata$fips) %in% unique(dat$fips))
all(unique(dat$fips) %in% as.character(cdata$fips))

unique(dat$fips)[!unique(dat$fips) %in% mycounties$fips]

cc <- fortify(mycounties)
ccd <- cbind(cc, mycounties@data[cc$id,])

# plot(mycounties)
length(mycounties$fips)
ncounties <- length(unique(dat$fips))