# Smoking prevelance

- Parameter variation `VerificationSmoking` in Anylogic

In [188]:
library(data.table)
library(xtable)
source("../src/utils.R")

In [189]:
# read data files
m = fread("../output/data/mortality-verification-smoking.csv")
p = fread("../output/data/parameters-verification-smoking.csv")
e = fread("../output/data/environment-verification-smoking.csv")

In [190]:
# select parameters of interest
parameters = c("smoking_rank_slope_exp_coeff")
setorderv(p, parameters)

# redefine iteration and replicate indexes
p[, niteration := .GRP, by = parameters]
p[, nreplicate := 1:.N, by = niteration]
np = p[, c("iteration", "replicate", "niteration", "nreplicate", parameters), with = FALSE]

In [191]:
# environment data

# create columns from arrays
vars = paste0("income", 1:5)
e = extractColumns(e, "prop_income_group", vars)
vars = paste0("le", 1:5)
e = extractColumns(e, "le_income_group",  vars)
vars = paste0("smoking",  1:5)
e = extractColumns(e, "smoking_income_group",  vars)

# merge files
e = merge(e, np, by = c("iteration", "replicate"))
setorderv(e, parameters)

In [192]:

# create tables with distributions
nhis = readRDS("../output/data/smoking_dist_nhis2019.rds")
s = melt(e, id.vars = c("niteration", "nreplicate"), measure.vars = patterns("^smoking[0-9]"), 
    value.name = "smoking_prop", variable.name = "income_group")


In [193]:
print(paste("Population no-treatment", round(mean(e[niteration == 1, population]), 0)))
print(paste("Population treatment", round(mean(e[niteration == 2, population]), 0)))

cf = s[niteration == 1, .(smoking_prop_cf = mean(smoking_prop)), income_group]
cf[, income_group := as.numeric(gsub("[a-z]+", "", income_group))]
cf = rbind(cf, data.table(income_group = 9, smoking_prop_cf  = mean(e[niteration == 1, smokers])))

trt = s[niteration == 2, .(smoking_prop_trt = mean(smoking_prop)), income_group]
trt[, income_group := as.numeric(gsub("[a-z]+", "", income_group))]
trt = rbind(trt, data.table(income_group = 9, smoking_prop_trt  = mean(e[niteration == 2, smokers])))


[1] "Population no-treatment 8154"
[1] "Population treatment 8156"


In [194]:
tab = Reduce(function(...) merge(..., all = TRUE, by = "income_group"), list(nhis, cf, trt))
setnames(tab, names(tab), 
    c("Income quintile" , "NHIS 2019", "MIA non-treatment", "MIA treatment"))

# latex table 
print(xtable(tab,
    caption = "Proportion smoking by income quintile"),
    table.placement = "htp",
    caption.placement = "top",
    include.rownames  = FALSE
)

% latex table generated in R 4.1.0 by xtable 1.8-4 package
% Sun Jan 16 16:10:09 2022
\begin{table}[htp]
\centering
\caption{Proportion smoking by income quintile} 
\begin{tabular}{rrrr}
  \hline
Income quintile & NHIS 2019 & MIA non-treatment & MIA treatment \\ 
  \hline
1.00 & 0.29 & 0.22 & 0.30 \\ 
  2.00 & 0.22 & 0.16 & 0.22 \\ 
  3.00 & 0.16 & 0.11 & 0.16 \\ 
  4.00 & 0.11 & 0.08 & 0.11 \\ 
  5.00 & 0.05 & 0.03 & 0.05 \\ 
  9.00 & 0.17 & 0.12 & 0.16 \\ 
   \hline
\end{tabular}
\end{table}


In [185]:
# le by smoking 
l = m[age > 40, .(le = mean(age)), .(smoker, income_group)]
setorder(l, income_group, smoker)
sl = m[age > 40, .(le = mean(age)), income_group]
setorder(sl, income_group)
sl

income_group,le
<int>,<dbl>
1,75.84344
2,77.29613
3,78.69346
4,79.9048
5,81.89833


In [173]:
m = merge(m, np, by = c("iteration", "replicate"))


In [187]:
m[, .(le = mean(age)), niteration]
m[age > 50, .(le = mean(age)), niteration]
m[age < 30, .(le = mean(age)), niteration]



niteration,le
<int>,<dbl>
1,76.54328
2,75.98216


niteration,le
<int>,<dbl>
1,80.23798
2,79.81572


niteration,le
<int>,<dbl>
1,14.96036
2,14.72121


In [182]:
diff(m[, .(le = mean(age)), .(smoker)]$le[c(2, 1)])

# exploring some values
a = mean(e[niteration == 1, le5])
b = mean(e[niteration == 2, le5])
a - b

a = mean(e[niteration == 1, le1])
b = mean(e[niteration == 2, le1])
a - b

# Income measures

In [212]:
inc = fread("../models/MobHealthRecycling/output/testing/income-1-1.csv")
inc[, kid_income := gsub("\\[|\\]", "", kid_income)]
inc[, parent_income := gsub("\\[|\\]", "", parent_income)]
sinc = inc[, .(kid = as.numeric(unlist(strsplit(kid_income, ','))), 
    parent = as.numeric(unlist(strsplit(parent_income, ',')))), 
    by = .(model_time, county)]
sinc = merge(sinc, 
    inc[, .(model_time, county, rank_slope, rank_absolute, rank_correlation, cohort_size )], 
    by = c("county", "model_time"))

In [213]:
head(sinc)

county,model_time,kid,parent,rank_slope,rank_absolute,rank_correlation,cohort_size
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
19,169,180000,66300,0.3047226,0.4260768,0.3047053,154
19,169,4030,33000,0.3047226,0.4260768,0.3047053,154
19,169,200600,100400,0.3047226,0.4260768,0.3047053,154
19,169,102000,38900,0.3047226,0.4260768,0.3047053,154
19,169,47700,269700,0.3047226,0.4260768,0.3047053,154
19,169,167000,120000,0.3047226,0.4260768,0.3047053,154


In [215]:
sinc[, kid_rank := perc.rank(kid), .(model_time)]
sinc[, parent_rank := perc.rank(parent), .(model_time)]

reg = function(kid_income, parent_income, relative = TRUE) {
    m = lm(kid_income ~ parent_income)
    c = coef(m)
    if (relative) { return(c[2])}
    else {
        return (c[1]  + 0.25 * c[2])
    }
}

sinc[, spearman := cor(kid, parent, method = "spearman"), model_time]
sinc[, cor_rank := cor(kid_rank, parent_rank), model_time]
sinc[, im := reg(kid_rank, parent_rank, TRUE), model_time]
sinc[, am := reg(kid_rank, parent_rank, FALSE), model_time]
sinc[, order := 1:.N, model_time]
sinc = sinc[order == 1]


In [216]:
summary(sinc$rank_slope - sinc$im)
summary(sinc$rank_absolute - sinc$am)
summary(sinc$rank_correlation - sinc$spearman)
summary(sinc$rank_correlation - sinc$cor_rank)

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0013498 -0.0009692 -0.0005325 -0.0004825 -0.0002743  0.0013390 

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0005836 -0.0001081  0.0001567  0.0001576  0.0003648  0.0007455 

                   Min.                 1st Qu.                  Median 
-0.00000000000000033307 -0.00000000000000011102  0.00000000000000000000 
                   Mean                 3rd Qu.                    Max. 
-0.00000000000000001883  0.00000000000000006245  0.00000000000000027756 

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0008707 -0.0006046 -0.0003101 -0.0001794  0.0001855  0.0011307 

In [217]:
cor(sinc$rank_slope, sinc$im)
cor(sinc$rank_absolute, sinc$am)
cor(sinc$rank_correlation, sinc$spearman)
cor(sinc$rank_correlation, sinc$cor_rank)