In [16]:
library(dplyr)
library(stringr)
library(readr)
library(tidyr)
library(lfe)
library(ggplot2)
library(glmnet)
library(aod)
library(xgboost)
source("../utils/labels_and_colors.r")
library(doMC)
registerDoMC(20)

In [17]:
fp <- '/nfs/sloanlab004/projects/covid_mobility_proj/data/'
panel <- readRDS('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/data_pre_residualization.RData')
weather.corr <- readRDS(str_c(fp, 'PROCESSED_DATA/weather_cor.RDS'))
sci          <- read_delim(str_c(fp, 'fb_social_connectedness/sci_county/county_county_data.tsv'), delim ='\t', col_types = 'ddd')

In [18]:
panel %>%
    select(-matches('_X_EgoSH'),
           -matches('_Xsh'),
           -matches('shiftshare'),
           -matches('alter_prcp'),
           -matches('alter_tmax')) %>%
    rename(ash_fb_rnstu            = ash_rnstu,  
           ash_sg_nhd              = ash_nhd,
           log_sg_mcbgv            = log_mcbgv,
           alter_sg_nhd_full       = alter_nhd,
           alter_fb_btvrc_full     = alter_btvrc,
           alter_fb_rnstu_full     = alter_rnstu,
           alter_fg_mcbgv_full     = alter_mcbgv,
           ash_alter_sg_nhd_full   = ash_alter_nhd,
           ash_alter_fb_rnstu_full = ash_alter_rnstu,
           log_alter_sg_mcbgv_full = log_alter_mcbgv) -> panel

In [19]:
ash_alter_sg_nhd_full

In [20]:
load('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/sciWM.RData')

In [21]:
panel %>%
    select(county_fips) %>%
    distinct() -> fips

panel %>%
    select(ds) %>%
    distinct() -> dates

panel %>%
    select(county_fips, n) %>%
    distinct() -> population

sci %>%
    inner_join(fips, by = c('user_county' = 'county_fips')) %>%
    inner_join(fips, by = c('fr_county' = 'county_fips')) %>%
    left_join(population, by = c('fr_county' = 'county_fips')) %>%
    mutate(w = ifelse(user_county == fr_county, 0 , scaled_sci * n)) -> sci

In [22]:
vars_to_alter <- c('fb_btvrc', 'fb_rnstu', 'sg_mcbgv', 'sg_nhd',
                   'prcp10', 'prcp11', 'prcp12' ,'prcp13', 'prcp14', 'prcp15', 'prcp16', 'prcp17', 'prcp18', 'prcp19', 'prcp20',
                   'tmax02', 'tmax03', 'tmax04', 'tmax05', 'tmax06', 'tmax07', 'tmax08', 'tmax09', 'tmax10', 'tmax11', 'tmax12', 
                   'tmax13', 'tmax14', 'tmax15', 'tmax16', 'tmax17', 'tmax18', 'tmax19', 'tmax20')

In [23]:
# function that constructs alter covariate vectors
weightedAlters.orginal <- function(df, wm, colname, t=NULL) {
    df %>% 
        select(ds, county_fips) %>%
        mutate(var = df[[colname]]) %>%
        spread(key = county_fips, value = var) %>%
        ungroup() %>%
        select(-ds) %>%
        as.matrix() -> txn_data
    
    outMatrix <- tcrossprod(txn_data, wm)
    colnames(outMatrix) <- colnames(txn_data)
    
    data.frame(dates, outMatrix) %>%
        gather(key = 'county_fips', value = 'value', -ds) %>%
        mutate(county_fips = as.integer(str_sub(county_fips, 2, -1))) %>% 
        arrange(ds, county_fips) %>%
        select(-ds, -county_fips) -> out_df
    if(is.null(t)) {colnames(out_df)[1] <- str_c('alter_', colname)}
    else {colnames(out_df)[1] <- str_c('alter_', colname, '.t', t)}
    return(out_df)
}

In [33]:
# function that constructs alter covariate vectors
weightedAlters <- function(df, nrnM, rnM, colname) {
    df %>% 
        select(ds, county_fips) %>%
        mutate(var = df[[colname]]) %>%
        spread(key = county_fips, value = var) %>%
        ungroup() %>%
        select(-ds) %>%
        as.matrix() -> txn_data
    
    outMatrix.nrn <- tcrossprod(txn_data, nrnM)
    outMatrix.rn  <- tcrossprod(txn_data, rnM)
    colnames(outMatrix.nrn) <- colnames(txn_data)
    colnames(outMatrix.rn) <- colnames(txn_data)
    
    data.frame(dates, outMatrix.nrn) %>%
        gather(key = 'county_fips', value = 'value', -ds) %>%
        mutate(county_fips = as.integer(str_sub(county_fips, 2, -1))) %>% 
        arrange(ds, county_fips) %>%
        select(-ds, -county_fips) -> out_df.nrn
    
    data.frame(dates, outMatrix.rn) %>%
        gather(key = 'county_fips', value = 'value', -ds) %>%
        mutate(county_fips = as.integer(str_sub(county_fips, 2, -1))) %>% 
        arrange(ds, county_fips) %>%
        select(-ds, -county_fips) -> out_df.rn
    
    colnames(out_df.nrn)[1] <- str_c('alter_', colname, '_nrn')
    colnames(out_df.rn)[1]  <- str_c('alter_', colname, '_rn')
    return(bind_cols(out_df.nrn, out_df.rn))
}

In [42]:
gen_thres_data <- function(thres) {
    as.data.frame(abs(weather.corr) < thres) %>%
        mutate(user_county = colnames(weather.corr)) %>%
        gather(key = 'fr_county', value = 'selector', -user_county) %>%
        mutate(user_county = as.numeric(user_county),
               fr_county = as.numeric(fr_county), 
               selector = as.numeric(selector)) %>%
        arrange(user_county, fr_county) %>%
        select(selector) -> selector
       
    sci %>% 
        arrange(user_county, fr_county) %>%
        bind_cols(selector) %>%
        mutate(w = scaled_sci * n * selector) %>%
        group_by(user_county) %>%
        mutate(w = w/sum(w)) %>% 
        select(user_county, fr_county, w) %>%
        spread(key = fr_county, value = w) %>%
        ungroup() %>%
        select(-user_county) %>%
        as.matrix() -> thresWMrn
    
    sci %>% 
        arrange(user_county, fr_county) %>%
        bind_cols(selector) %>%
        mutate(w = ifelse(user_county == fr_county, 0, scaled_sci * n)) %>%
        group_by(user_county) %>%
        mutate(w = w/sum(w),
                w = w * selector) %>% 
        select(user_county, fr_county, w) %>%
        spread(key = fr_county, value = w) %>%
               ungroup() %>%
        select(-user_county) %>%
        as.matrix() -> thresWMnrn  

    alters <- foreach(i = 1:length(vars_to_alter), .combine = cbind) %dopar% 
        weightedAlters(panel, thresWMnrn, thresWMrn, vars_to_alter[i])
    
    panel %>%
        select(ds,
               county_fips,
               stay_home,
               ban_gmr,
               alter_sh,
               alter_bgmr,
               geo_alter_sh,
               geo_alter_bgmr,
               ash_sg_nhd,
               ash_fb_rnstu,
               log_sg_mcbgv,
               alter_sg_nhd_full,
               alter_fb_btvrc_full,
               alter_fb_rnstu_full,
               alter_fg_mcbgv_full,
               ash_alter_sg_nhd_full,
               ash_alter_fb_rnstu_full,
               log_alter_sg_mcbgv_full,
               all_of(vars_to_alter),
               state_abbv,
               n) %>%
        bind_cols(alters) %>%
        mutate(log_alter_sg_mcbgv_rn = log(alter_sg_mcbgv_rn),
               ash_alter_sg_nhd_rn   = asinh(alter_sg_nhd_rn),
               ash_alter_fb_rnstu_rn = asinh(alter_fb_rnstu_rn),
               log_alter_sg_mcbgv_nrn = log(alter_sg_mcbgv_nrn),
               ash_alter_sg_nhd_nrn   = asinh(alter_sg_nhd_nrn),
               ash_alter_fb_rnstu_nrn = asinh(alter_fb_rnstu_nrn)) -> out
    
    return(out)
}

In [86]:
run_model <- function(df, DVar, EndogVar, type, class, thres) {
    val <- thres
    if(type == 'rn') {
        form <- as.formula(
        str_c(DVar,
            ' ~ prcp10 + prcp11 + prcp12 + prcp13 + prcp14 + prcp15 + prcp16 + prcp17 + prcp18 + prcp19 + prcp20 +
                tmax02 + tmax03 + tmax04 + tmax05 + tmax06 + tmax07 + tmax08 + tmax09 + tmax10 + tmax11 + 
                tmax12 + tmax13 + tmax14 + tmax15 + tmax16 + tmax17 + tmax18 + tmax19 + tmax20 + 
                ban_gmr + alter_bgmr + geo_alter_bgmr + stay_home + alter_sh + geo_alter_sh | county_fips + ds | (',
             EndogVar,
             '_rn ~ alter_prcp10_rn + alter_prcp11_rn + alter_prcp12_rn + alter_prcp13_rn + alter_prcp14_rn + alter_prcp15_rn + 
                    alter_prcp16_rn + alter_prcp17_rn + alter_prcp18_rn + alter_prcp19_rn + alter_prcp20_rn +
                    alter_tmax02_rn + alter_tmax03_rn + alter_tmax04_rn + alter_tmax05_rn + alter_tmax06_rn + alter_tmax07_rn + 
                    alter_tmax08_rn + alter_tmax09_rn + alter_tmax10_rn + alter_tmax11_rn + alter_tmax12_rn + alter_tmax13_rn + 
                    alter_tmax14_rn + alter_tmax15_rn + alter_tmax16_rn + alter_tmax17_rn + alter_tmax18_rn + alter_tmax19_rn + 
                    alter_tmax20_rn) | state_abbv')
        )
    }
    
    if(type == 'nrn') {
        form <- as.formula(
        str_c(DVar,
            ' ~ prcp10 + prcp11 + prcp12 + prcp13 + prcp14 + prcp15 + prcp16 + prcp17 + prcp18 + prcp19 + prcp20 +
                tmax02 + tmax03 + tmax04 + tmax05 + tmax06 + tmax07 + tmax08 + tmax09 + tmax10 + tmax11 + 
                tmax12 + tmax13 + tmax14 + tmax15 + tmax16 + tmax17 + tmax18 + tmax19 + tmax20 + 
                ban_gmr + alter_bgmr + geo_alter_bgmr + stay_home + alter_sh + geo_alter_sh | county_fips + ds | (',
             EndogVar,
             '_nrn ~ alter_prcp10_nrn + alter_prcp11_nrn + alter_prcp12_nrn + alter_prcp13_nrn + alter_prcp14_nrn + alter_prcp15_nrn + 
                     alter_prcp16_nrn + alter_prcp17_nrn + alter_prcp18_nrn + alter_prcp19_nrn + alter_prcp20_nrn +
                     alter_tmax02_nrn + alter_tmax03_nrn + alter_tmax04_nrn + alter_tmax05_nrn + alter_tmax06_nrn + alter_tmax07_nrn + 
                     alter_tmax08_nrn + alter_tmax09_nrn + alter_tmax10_nrn + alter_tmax11_nrn + alter_tmax12_nrn + alter_tmax13_nrn + 
                     alter_tmax14_nrn + alter_tmax15_nrn + alter_tmax16_nrn + alter_tmax17_nrn + alter_tmax18_nrn + alter_tmax19_nrn + 
                     alter_tmax20_nrn) | state_abbv')
        )   
    }
    
    if(type == 'full') {
        form <- as.formula(
        str_c(DVar,
            ' ~ prcp10 + prcp11 + prcp12 + prcp13 + prcp14 + prcp15 + prcp16 + prcp17 + prcp18 + prcp19 + prcp20 +
                      tmax02 + tmax03 + tmax04 + tmax05 + tmax06 + tmax07 + tmax08 + tmax09 + tmax10 + tmax11 + 
                      tmax12 + tmax13 + tmax14 + tmax15 + tmax16 + tmax17 + tmax18 + tmax19 + tmax20 + 
                      ban_gmr + alter_bgmr + geo_alter_bgmr + stay_home + alter_sh + geo_alter_sh | county_fips + ds | (',
             EndogVar,
             '_full ~ alter_prcp10_nrn + alter_prcp11_nrn + alter_prcp12_nrn + alter_prcp13_nrn + alter_prcp14_nrn + alter_prcp15_nrn + 
                      alter_prcp16_nrn + alter_prcp17_nrn + alter_prcp18_nrn + alter_prcp19_nrn + alter_prcp20_nrn +
                      alter_tmax02_nrn + alter_tmax03_nrn + alter_tmax04_nrn + alter_tmax05_nrn + alter_tmax06_nrn + alter_tmax07_nrn + 
                      alter_tmax08_nrn + alter_tmax09_nrn + alter_tmax10_nrn + alter_tmax11_nrn + alter_tmax12_nrn + alter_tmax13_nrn + 
                      alter_tmax14_nrn + alter_tmax15_nrn + alter_tmax16_nrn + alter_tmax17_nrn + alter_tmax18_nrn + alter_tmax19_nrn + 
                      alter_tmax20_nrn) | state_abbv')
        )
    }
    
    if(class == 'liml') {
        fit <- felm(form, df, weights = df$n, kclass = 'liml')
        out <- data.frame(thres = val, 
                          summary(fit)$coef[31:37, 1:2], 
                          type = type) %>%
        mutate(var = rownames(.), class = 'LIML', DV = DVar)
        colnames(out) <- c('thres', 'coef', 'se', 'type', 'var', 'class', 'DV')
    } else {
        fit <- felm(form, df, weights = df$n)
        out <- data.frame(thres = val, 
                          summary(fit)$coef[31:37, 1:2], 
                          type = type) %>%
        mutate(var = rownames(.), class = '2SLS', DV = DVar)
        colnames(out) <- c('thres', 'coef', 'se', 'type', 'var', 'class', 'DV')
    }   
    return(out)
}

In [89]:
run_model_and_extract_coef <- function(thres, varlist) {
    print(thres)
    df <- gen_thres_data(thres)    
    out <- foreach(i = 1:nrow(varlist), .combine = rbind) %:%
        foreach(type = c('rn', 'nrn', 'full'), .combine = rbind) %:% 
        foreach(class = c('liml', '2sls'), .combine = rbind) %dopar%
        run_model(df, varlist[[i, 1]], varlist[[i, 2]], type, class, thres)
    return(out)
}

In [90]:
varlist <- data.frame(
    DVar     = c('log_sg_mcbgv',       'ash_fb_rnstu',       'ash_sg_nhd',       'fb_btvrc'),
    EndogVar = c('log_alter_sg_mcbgv', 'ash_alter_fb_rnstu', 'ash_alter_sg_nhd', 'alter_fb_btvrc'), 
    stringsAsFactors = F
)

In [91]:
results <- foreach(t = seq(0.5, 1, .05), .combine = rbind) %do% run_model_and_extract_coef(t, varlist)

[1] 0.5
[1] 0.55
[1] 0.6
[1] 0.65
[1] 0.7
[1] 0.75
[1] 0.8
[1] 0.85
[1] 0.9
[1] 0.95
[1] 1


In [112]:
results <- foreach(t = seq(0.05, 1, .05), .combine = rbind) %do% run_model_and_extract_coef(t, varlist)

[1] 0.05
[1] 0.1
[1] 0.15
[1] 0.2
[1] 0.25
[1] 0.3
[1] 0.35
[1] 0.4
[1] 0.45
[1] 0.5
[1] 0.55
[1] 0.6
[1] 0.65
[1] 0.7
[1] 0.75
[1] 0.8
[1] 0.85
[1] 0.9
[1] 0.95
[1] 1


In [125]:
levels <- c('ban_gmr', 'alter_bgmr', 'geo_alter_bgmr', 'stay_home', 'alter_sh', 'geo_alter_sh', 
            'alter_fb_btvrc', 'ash_alter_fbstu', 'ash_alter_sg_nhd', 'log_alter_sg_mcbgv')

results %>%
    mutate(var = str_replace_all(var, '`', ''), 
           var = str_replace_all(var, '\\(fit\\)', ''),
           var = str_replace_all(var, '_nrn', ''),
           var = str_replace_all(var, '_rn', ''),
           var = str_replace_all(var, '_full', ''),
           var = factor(var, levels = levels)) -> results

In [188]:
options(repr.plot.width=12, repr.plot.height=10)
labels <- c('Renormalized', 'Non-renormalized', 'Full Endogenous Variable')
results %>%
    filter(DV == 'fb_btvrc') %>%
    ggplot(aes(x = thres, y = coef, color = type, fill = type)) + 
    geom_ribbon(aes(ymin = coef - 1.96 * se, ymax = coef + 1.96 * se), alpha = .2, color = NA) + 
    geom_line(size = 1) + 
    facet_grid(var~class, scales = 'free') +
    theme(legend.position = 'bottom') +
    geom_line(size = 1.5) + 
    scale_color_viridis_d(option = 'D', labels = labels) +
    scale_fill_viridis_d(option = 'D', labels = labels) +
    ylab('Estimate') +
    xlab('Absolute Correlation Threshold') +
    ggtitle('Weather Threshold Sensitivity: BTVRC (FB)') +
    scale_x_continuous(expand = c(0,0), breaks = seq(0.05, 1, .05)) + 
    geom_segment(aes(x=.05, xend=1, y=0, yend=0), linetype = 2, size = .5, color = 'black', alpha = .5) +
    theme(panel.spacing.x=unit(.75, "lines"),
          axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5),
          #strip.text.x = element_blank(),
          #strip.text.y = element_text(size=16),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"),
          #strip.placement.y='top',
          #strip.background=element_blank(),
          strip.text.x.top = element_text(size=16),
          legend.position="bottom",
          plot.title = element_text(hjust = .5)) -> p
ggsave(p, file = '~/covid_mobility/paper_code/output/weather_threshold_btvrc.pdf', scale = 1.25, width = 6.5, height = 8)

In [189]:
results %>%
    filter(DV == 'ash_fb_rnstu') %>%
    ggplot(aes(x = thres, y = coef, color = type, fill = type)) + 
    geom_ribbon(aes(ymin = coef - 1.96 * se, ymax = coef + 1.96 * se), alpha = .2, color = NA) + 
    geom_line(size = 1) + 
    facet_grid(var~class, scales = 'free') +
    theme(legend.position = 'bottom') +
    geom_line(size = 1.5) + 
    scale_color_viridis_d(option = 'D', labels = labels) +
    scale_fill_viridis_d(option = 'D', labels = labels) +
    ylab('Estimate') +
    xlab('Absolute Correlation Threshold') +
    ggtitle('Weather Threshold Sensitivity: ASINH RNSTU (FB)') +
    scale_x_continuous(expand = c(0,0), breaks = seq(0.05, 1, .05)) + 
    geom_segment(aes(x=.05, xend=1, y=0, yend=0), linetype = 2, size = .5, color = 'black', alpha = .5) +
    theme(panel.spacing.x=unit(.75, "lines"),
          axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5),
          #strip.text.x = element_blank(),
          #strip.text.y = element_text(size=16),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"),
          #strip.placement.y='top',
          #strip.background=element_blank(),
          strip.text.x.top = element_text(size=16),
          legend.position="bottom",
          plot.title = element_text(hjust = .5)) -> p
ggsave(p, file = '~/covid_mobility/paper_code/output/weather_threshold_rnstu.pdf', scale = 1.25, width = 6.5, height = 8)

In [190]:
results %>%
    filter(DV == 'ash_sg_nhd') %>%
    ggplot(aes(x = thres, y = coef, color = type, fill = type)) + 
    geom_ribbon(aes(ymin = coef - 1.96 * se, ymax = coef + 1.96 * se), alpha = .2, color = NA) + 
    geom_line(size = 1) + 
    facet_grid(var~class, scales = 'free') +
    theme(legend.position = 'bottom') +
    geom_line(size = 1.5) + 
    scale_color_viridis_d(option = 'D', labels = labels) +
    scale_fill_viridis_d(option = 'D', labels = labels) +
    ylab('Estimate') +
    xlab('Absolute Correlation Threshold') +
    ggtitle('Weather Threshold Sensitivity: ASINH NHD (SG)') +
    scale_x_continuous(expand = c(0,0), breaks = seq(0.05, 1, .05)) + 
    geom_segment(aes(x=.05, xend=1, y=0, yend=0), linetype = 2, size = .5, color = 'black', alpha = .5) +
    theme(panel.spacing.x=unit(.75, "lines"),
          axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5),
          #strip.text.x = element_blank(),
          #strip.text.y = element_text(size=16),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"),
          #strip.placement.y='top',
          #strip.background=element_blank(),
          strip.text.x.top = element_text(size=16),
          legend.position="bottom",
          plot.title = element_text(hjust = .5)) -> p
ggsave(p, file = '~/covid_mobility/paper_code/output/weather_threshold_nhd.pdf', scale = 1.25, width = 6.5, height = 8)

In [191]:
results %>%
    filter(DV == 'log_sg_mcbgv') %>%
    ggplot(aes(x = thres, y = coef, color = type, fill = type)) + 
    geom_ribbon(aes(ymin = coef - 1.96 * se, ymax = coef + 1.96 * se), alpha = .2, color = NA) + 
    geom_line(size = 1) + 
    facet_grid(var~class, scales = 'free') +
    theme(legend.position = 'bottom') +
    geom_line(size = 1.5) + 
    scale_color_viridis_d(option = 'D', labels = labels) +
    scale_fill_viridis_d(option = 'D', labels = labels) +
    ylab('Estimate') +
    xlab('Absolute Correlation Threshold') +
    ggtitle('Weather Threshold Sensitivity: LOG MCBGV (SG)') +
    scale_x_continuous(expand = c(0,0), breaks = seq(0.05, 1, .05)) + 
    geom_segment(aes(x=.05, xend=1, y=0, yend=0), linetype = 2, size = .5, color = 'black', alpha = .5) +
    theme(panel.spacing.x=unit(.75, "lines"),
          axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5),
          #strip.text.x = element_blank(),
          #strip.text.y = element_text(size=16),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"),
          #strip.placement.y='top',
          #strip.background=element_blank(),
          strip.text.x.top = element_text(size=16),
          legend.position="bottom",
          plot.title = element_text(hjust = .5)) -> p
ggsave(p, file = '~/covid_mobility/paper_code/output/weather_threshold_mcbgv.pdf', scale = 1.25, width = 6.5, height = 8)

In [192]:
panel <- readRDS('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/data_ego_alter_interactions.RData')

In [196]:
panel$alter_prcp11.r

“Unknown or uninitialised column: `alter_prcp11.r`.”


NULL