# Testing

In [205]:
options(scipen = 100)
library(data.table)

# functions
table = function (...) base::table(..., useNA = 'ifany')
cor = function(...) stats::cor(..., use = "complete.obs")
perc.rank = function(x) trunc(rank(x)) / length(x)

In [206]:
# read data files
p = fread("../models/MobHealthRecycling/output/testing/parameters-1-1.csv")
m = fread("../models/MobHealthRecycling/output/testing/mortality-1-1.csv")
c = fread("../models/MobHealthRecycling/output/testing/county-1-1.csv")
e = fread("../models/MobHealthRecycling/output/testing/environment-1-1.csv")
mi = fread("../models/MobHealthRecycling/output/testing/migration-1-1.csv")
ag = fread("../models/MobHealthRecycling/output/testing/agents-1-1.csv")

# Testing exposures

In [207]:
sid = unique(ag$id)[3]
print(sid)

[1] 48871


In [208]:
t = ag[id == sid, .(id, age, prob_income_group, prob_transition, time_exposure, final_transition_probs)]
t[, prob_income_group := gsub("\\[|\\]", "", prob_income_group)]
t[, prob_transition := gsub("\\[|\\]", "", prob_transition)]
t[, paste0("i", 1:5) := tstrsplit(prob_income_group, ",", fixed = TRUE)]
t[, paste0("t", 1:5) := tstrsplit(prob_transition, ",", fixed = TRUE)]
vars = c(paste0("i", 1:5), paste0("t", 1:5))
t[, (vars) := lapply(.SD, as.numeric), .SDcols = vars]

In [209]:
check = as.numeric(tstrsplit(gsub("\\[|\\]", "", tail(t, 1)[, .(final_transition_probs)]), ",", fixed = TRUE))
sum(check)
check

In [211]:
# t[, check1 := apply(.SD, 1, mean), .SDcols = c("i2", "t2")]
for (i in 1:5) {
    w = 0.0
    t[, check1 := (get(paste0("i", i)) * w + get(paste0("t", i)) * (1-w))]
    print(
        paste0("Difference group ", i, ": ", 
            t[time_exposure > 0, weighted.mean(check1, time_exposure)] - check[i]
        )
    )
}

[1] "Difference group 1: 0"
[1] "Difference group 2: 0"
[1] "Difference group 3: 0"
[1] "Difference group 4: 0"
[1] "Difference group 5: 0"


# Income measures

In [212]:
inc = fread("../models/MobHealthRecycling/output/testing/income-1-1.csv")
inc[, kid_income := gsub("\\[|\\]", "", kid_income)]
inc[, parent_income := gsub("\\[|\\]", "", parent_income)]
sinc = inc[, .(kid = as.numeric(unlist(strsplit(kid_income, ','))), 
    parent = as.numeric(unlist(strsplit(parent_income, ',')))), 
    by = .(model_time, county)]
sinc = merge(sinc, 
    inc[, .(model_time, county, rank_slope, rank_absolute, rank_correlation, cohort_size )], 
    by = c("county", "model_time"))

In [213]:
head(sinc)

county,model_time,kid,parent,rank_slope,rank_absolute,rank_correlation,cohort_size
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
19,169,180000,66300,0.3047226,0.4260768,0.3047053,154
19,169,4030,33000,0.3047226,0.4260768,0.3047053,154
19,169,200600,100400,0.3047226,0.4260768,0.3047053,154
19,169,102000,38900,0.3047226,0.4260768,0.3047053,154
19,169,47700,269700,0.3047226,0.4260768,0.3047053,154
19,169,167000,120000,0.3047226,0.4260768,0.3047053,154


In [215]:
sinc[, kid_rank := perc.rank(kid), .(model_time)]
sinc[, parent_rank := perc.rank(parent), .(model_time)]

reg = function(kid_income, parent_income, relative = TRUE) {
    m = lm(kid_income ~ parent_income)
    c = coef(m)
    if (relative) { return(c[2])}
    else {
        return (c[1]  + 0.25 * c[2])
    }
}

sinc[, spearman := cor(kid, parent, method = "spearman"), model_time]
sinc[, cor_rank := cor(kid_rank, parent_rank), model_time]
sinc[, im := reg(kid_rank, parent_rank, TRUE), model_time]
sinc[, am := reg(kid_rank, parent_rank, FALSE), model_time]
sinc[, order := 1:.N, model_time]
sinc = sinc[order == 1]


In [216]:
summary(sinc$rank_slope - sinc$im)
summary(sinc$rank_absolute - sinc$am)
summary(sinc$rank_correlation - sinc$spearman)
summary(sinc$rank_correlation - sinc$cor_rank)

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0013498 -0.0009692 -0.0005325 -0.0004825 -0.0002743  0.0013390 

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0005836 -0.0001081  0.0001567  0.0001576  0.0003648  0.0007455 

                   Min.                 1st Qu.                  Median 
-0.00000000000000033307 -0.00000000000000011102  0.00000000000000000000 
                   Mean                 3rd Qu.                    Max. 
-0.00000000000000001883  0.00000000000000006245  0.00000000000000027756 

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0008707 -0.0006046 -0.0003101 -0.0001794  0.0001855  0.0011307 

In [217]:
cor(sinc$rank_slope, sinc$im)
cor(sinc$rank_absolute, sinc$am)
cor(sinc$rank_correlation, sinc$spearman)
cor(sinc$rank_correlation, sinc$cor_rank)