# Cause of death analysis

In [35]:
library(haven)
library(sdazar)
library(data.table)
library(ggplot2)
library(stringr)
library(brms)
options(mc.cores = parallel::detectCores()-2)
options(repr.plot.width = 5, repr.plot.height = 3)

source('functions.R')
seed = 121911112018

In [36]:
cd = data.table(read_dta('../data/cause_contribution_cum_organized_final.dta'))

In [37]:
le = fread('../data/featured_LE_data.csv')

In [38]:
# merge files

country_labels = c("Argentina", "Brazil", "Chile", "Colombia", 
                   "Costa_Rica", "Cuba", "Dominican_Republic", "Ecuador", 
                   "El_Salvador", "Guatemala", "Honduras", "Mexico", "Nicaragua", 
                   "Panama", "Paraguay", "Peru", "Uruguay", "Venezuela")

cd[, ctry := factor(ctry, labels=country_labels)]

In [39]:
length(country_labels)

In [40]:
# year recode based on the criteria proposed by Alberto
cd[year<1970, gyear:='<1970']
cd[year>=1970, gyear:='>=1970']
cd[, ctry_year := paste0(ctry,'.', gyear)]

In [41]:
# colnames(test)

checktext = function (x) {
    return(grepl(pattern = '^cause', x = x, ignore.case = TRUE))
    }

causes = colnames(cd)[sapply(colnames(cd), checktext)]
print(causes)

 [1] "cause_1"  "cause_5"  "cause_10" "cause_14" "cause_18" "cause_29"
 [7] "cause_30" "cause_31" "cause_32" "cause_33"


In [42]:
df = merge(cd, le[, .(ctry, year,igdp_log, ilit_log, iurban_log, iwater_log, isewage_log , ielec_log)], 
      on=c('ctry', 'year'), all.x=TRUE)

In [43]:
# no missing data
print(prop_miss(df))

       year        ctry     cause_1     cause_5    cause_10    cause_14 
          0           0           0           0           0           0 
   cause_18    cause_29    cause_30    cause_31    cause_32    cause_33 
          0           0           0           0           0           0 
 LE_initial       gyear   ctry_year    igdp_log    ilit_log  iurban_log 
          0           0           0           0           0           0 
 iwater_log isewage_log   ielec_log 
          0           0           0 


In [44]:
df[, zyear := scale(year, center=TRUE, scale=TRUE)]

## Models for cause of death contribution (no time)

In [45]:
# set prior of betas to normal(0,5)
prior = set_prior("normal(0, 5)", class = "b")

In [46]:
table(df$gyear)


 <1970 >=1970 
   178    670 

In [47]:
a = df[year==2004, ctry]
b = df[year==1996, ctry]

In [48]:
selected_labels = df[year==2004, ctry]

In [49]:
print(selected_labels)

 [1] "Argentina"          "Brazil"             "Chile"             
 [4] "Colombia"           "Costa_Rica"         "Cuba"              
 [7] "Dominican_Republic" "Ecuador"            "El_Salvador"       
[10] "Guatemala"          "Mexico"             "Nicaragua"         
[13] "Panama"             "Paraguay"           "Peru"              
[16] "Uruguay"            "Venezuela"         


In [50]:
# loop by cause
# full model

for (c in causes) {
    
    print(paste0('::::: Modeling ', c))
    
    f = paste0(c, '~ 1 + igdp_log + ilit_log + iurban_log + iwater_log + isewage_log + ielec_log + 
                        (igdp_log|ctry_year)')
    m = brm(formula = f, 
                   data = df,
                   iter = 2000, 
                   chains = 2,
                   seed = seed, 
                   prior=prior, 
                   control=  list(adapt_delta=0.90, max_treedepth = 15))
    
    est_shifts = compute_shifts(models = list(m),
                        data = df, 
                        transform = FALSE,
                        obs_var = c,
                        countries = selected_labels, 
                        years = c(1966, 2004),
                        model_pred = list('1966' = '<1970', '2004' = '>=1970')
                        )

    fwrite(est_shifts, file= paste0('output/shift_', c, '_full.csv'))
    
    remove(m, est_shifts)
}


[1] "::::: Modeling cause_1"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_5"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_10"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

In [28]:
# loop by cause
# full model

for (c in causes) {
    
    print(paste0('::::: Modeling ', c))
    
    f = paste0(c, '~ 1 + igdp_log + ilit_log + iurban_log + iwater_log + isewage_log + ielec_log + 
                    LE_initial + zyear + (igdp_log|ctry_year)')
    m = brm(formula = f, 
                   data = df,
                   iter = 2000, 
                   chains = 2,
                   seed = seed, 
                   prior=prior, 
                   control=  list(adapt_delta=0.90, max_treedepth = 15))
    
    est_shifts = compute_shifts(models = list(m),
                        data = df, 
                        transform = FALSE,
                        obs_var = c,
                        countries = selected_labels, 
                        years = c(1966, 2004),
                        model_pred = list('1966' = '<1970', '2004' = '>=1970')
                        )

    fwrite(est_shifts, file= paste0('output/shift_', c, '_full_leinitial_year.csv'))
    
    remove(m, est_shifts)
}


[1] "::::: Modeling cause_1"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_5"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_10"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_14"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_18"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_29"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_30"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_31"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_32"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”

[1] "::::: Modeling cause_33"


Compiling the C++ model
Start sampling
“NAs introduced by coercion to integer range”