In [12]:
library(readr)
library(dplyr)
library(stringr)
library(lfe)
library(lubridate)
library(ggplot2)
library(doMC)
registerDoMC(4)

In [13]:
fp <- '/pool001/mfzhao/'
df <- read_rds(str_c(fp, '/PROCESSED_DATA/panel_xgr.RDS'))

In [14]:
cns <- colnames(df)
cns

In [15]:
ldvs <- c('log_mcbgv.r', 'log_pnchd.r', 'log_pgt1hafh.r', 'log_ppthlt75.r', 'log_pgt2kmt.r')
dvs  <- c('mcbgv.r', 'pnchd.r', 'pgt1hafh.r', 'ppthlt75.r', 'pgt2kmt.r')

In [7]:
get_coefs <- function(dv, vars, ivmod = F, ...) {
    lhs <- str_c(dv, ' ~ ')
    rhs <- str_c(vars, '| key + date | ')

    if(ivmod == T) {
        alter_weather <- str_c(cns[str_detect(cns, '^stalter_[pt][rm][ca][px][01][0-9]\\.r$')], collapse = ' + ')
        iv <- ifelse(str_detect(dv, 'log'), str_replace(dv, 'log', 'log_alter'), str_c('alter_', dv))
        ivf <- str_c('(', iv, ' ~ ', alter_weather, ') | cluster')
        rhs <- str_c(rhs, ivf)
    } else {
        rhs <- str_c(rhs, '0 | cluster')
    }
    form <- as.formula(str_c(lhs, rhs))
    model <- felm(form, df, weights = df$n, ...)
    
    as.data.frame(summary(model)$coef) %>%
        mutate(var   = rownames(.),
               dv    = dv,
               model = ifelse(vars == f1, 'base', 'ap'),
               model = ifelse(ivmod, str_c(model, ' + iv'), model)) %>%
        filter(!str_detect(var, '[pPtT][rRmM][cCaA][pPxX]')) %>%
        select(7, 6, 5, 1, 2, 3, 4) -> out
    
    colnames(out) <- c('model', 'dv', 'var', 'estimate', 'se', 't', 'p-val')
    return(out)
}

In [21]:
f1 <- ' p1sdp.r + p2shp.r + p3rop.r '  
f2 <- ' p1sdp.r + p2shp.r + p3rop.r + stalter_p1sdp.r + stalter_p2shp.r + stalter_p3rop.r '

In [22]:
foreach(dv = ldvs, .combine = rbind) %:% 
    foreach(f = c(f1, f2), .combine = rbind) %:%
    foreach(ivmod = c(F, T), .combine = rbind) %dopar%
    get_coefs(dv, f, ivmod) -> coefs

In [31]:
options(repr.plot.width=15, repr.plot.height=10)
pn <- c('p1sdp' = "Initial Policies",
        'p2shp' = "Shelter-in-place",
        'p3rop' = "Reopening",
        'a'     = 'ego state policy',
        'b'     = 'alter state policy')

coefs %>%
    mutate(dv  = str_replace(dv, '\\.r', ''),
           var = str_replace(var, '\\.r', ''),
           var = ifelse(str_detect(var, 'fit'), 'Peer Effect', var)) %>%
    filter(var != 'Peer Effect') %>%
    filter(model != 'base + iv') %>%
    mutate(vtype = ifelse(str_detect(var, 'stalter'), 'b', 'a'),
           var   = str_replace(var, 'stalter_', ''),
           model = factor(model, levels = c('base', 'base + iv', 'ap', 'ap + iv'))) %>%
    ggplot(aes(x = model, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 2) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = .5) +
    facet_grid(vtype~var, scales = 'free_y', labeller = as_labeller(pn)) +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=20),
          legend.position = 'bottom') -> p

ggsave('/home/mfzhao/p2.pdf', p, device = 'pdf', width = 6.5, height = 5, scale = 2)

In [75]:
coefs %>%
    mutate(dv  = str_replace(dv, '\\.r', ''),
           var = str_replace(var, '\\.r', ''),
           var = ifelse(str_detect(var, 'fit'), 'Peer Effect', var)) %>%
    filter(var == 'Peer Effect')

model,dv,var,estimate,se,t,p-val
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
base + iv,log_mcbgv,Peer Effect,1.806846,0.1849542,9.76915,3.506861e-13
ap + iv,log_mcbgv,Peer Effect,1.834902,0.1899476,9.660044,5.080797e-13
base + iv,log_pnchd,Peer Effect,2.013705,0.1894995,10.626442,2.000159e-14
ap + iv,log_pnchd,Peer Effect,2.073432,0.1930915,10.738083,1.386617e-14
base + iv,log_pgt1hafh,Peer Effect,2.135973,0.1977885,10.799276,1.135099e-14
ap + iv,log_pgt1hafh,Peer Effect,2.193293,0.2174899,10.084572,1.210071e-13
base + iv,log_ppthlt75,Peer Effect,2.287997,0.2359575,9.696648,4.485881e-13
ap + iv,log_ppthlt75,Peer Effect,2.293271,0.2623006,8.742912,1.206497e-11
base + iv,log_pgt2kmt,Peer Effect,2.18212,0.210632,10.359868,4.827233e-14
ap + iv,log_pgt2kmt,Peer Effect,2.227914,0.2347766,9.489509,9.094317e-13


In [None]:
install.packages('paletteer')

In [None]:
coefs <- foreach(dv = dvs, .combine = rbind) %:%
    foreach(type = c(1, 2, 3), .combine = rbind) %:%
    foreach(ivmod = c(F, T), .combine = rbind) %dopar% get_coefs(dv, type, ivmod)

coefs %>%
    mutate(model = str_c('model', models + ifelse(iv, 3, 0))) -> coefs

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
pn <- c('p1sdp' = "Initial Policies",
        'p2shp' = "Shelter-in-place",
        'p3rop' = "Reopening")

coefs %>%
    filter(model %in% c('model1', 'model2')) %>%
    mutate(model = ifelse(model == 'model1', 'Base', 'Base + Slopes'),
           vtype = ifelse(str_detect(var, 'DSS'), 'slope', 'delta'),
           var   = str_replace(var, 'DSS', '')) %>%
    ggplot(aes(x = model, y = estimate, color = dv)) + 
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 2) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = .5) +
    facet_grid(vtype~var, scales = 'free_y', labeller = as_labeller(pn)) +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=15),
          strip.text.y = element_blank())
    

In [None]:
coefs %>%
    filter(model %in% c('model1', 'model3')) %>%
    mutate(model = ifelse(model == 'model1', 'Base', 'Base + AP'),
           vtype = ifelse(str_detect(var, 'alter_'), 'slope', 'delta'),
           var   = str_replace(var, 'alter_', '')) %>%
    ggplot(aes(x = model, y = estimate, color = dv)) + 
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 2) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = .5) +
    facet_grid(vtype~var, scales = 'free_y', labeller = as_labeller(pn)) +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=15),
          strip.text.y = element_blank())
    

In [None]:
coefs %>%
    filter(model %in% c('model1', 'model2', 'model5')) %>%
    mutate(model = ifelse(model == 'model1', 'Base', 
                          ifelse(model == 'model2', 'Slopes', 'Slopes + IV')),
           vtype = ifelse(str_detect(var, 'fit'), 'iv', 
                          ifelse(str_detect(var, 'DSS'), 'slope', 'base')),
           var   = str_replace(var, 'DSS', ''),
           var   = ifelse(str_detect(var, 'fit'), 'Peer Effect', var)) %>%
    filter(var != 'Peer Effect') %>%
    ggplot(aes(x = model, y = estimate, color = dv)) + 
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 2) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = .5) +
    facet_grid(vtype ~ var, scales = 'free', labeller = as_labeller(pn)) +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=15),
          strip.text.y = element_blank(),
          legend.position = 'bottom')
    

In [None]:
coefs %>%
    filter(model %in% c('model1', 'model3', 'model6')) %>%
    mutate(model = ifelse(model == 'model1', 'Base', 
                          ifelse(model == 'model3', 'AP', 'AP + IV')),
           vtype = ifelse(str_detect(var, 'fit'), 'iv', 
                          ifelse(str_detect(var, 'alter_'), 'slope', 'base')),
           var   = str_replace(var, 'alter_', ''),
           var   = ifelse(str_detect(var, 'fit'), 'Peer Effect', var),
           model = factor(model, levels = c('Base', 'AP', 'AP + IV')) ) %>%
    filter(var != 'Peer Effect') %>%
    ggplot(aes(x = model, y = estimate, color = dv)) + 
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 2) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = .5) +
    facet_grid(vtype ~ var, scales = 'free', labeller = as_labeller(pn)) +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=15),
          strip.text.y = element_blank(),
          legend.position = 'bottom')
    

In [None]:
coefs %>%
    mutate(var = ifelse(str_detect(var, 'fit'), 'Peer Effect', var),
           model = ifelse(model == 'model4', 'IV',
                         ifelse(model == 'model5', 'slopes + IV', 'AP + IV'))) %>%
    mutate(estimate = estimate * 1.2,
           se = se * 1.2) %>%
    filter(var == 'Peer Effect') %>%
    ggplot(aes(x = model, y = estimate, color = dv)) + 
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 2) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = .5) +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=15),
          strip.text.y = element_blank(),
          legend.position = 'bottom')

In [None]:
temp %>%
    filter(var == 'Peer Effect') %>%
    rename(var2 = var) %>%
    left_join(temp %>%
              filter(vtype == 'base', model == 'Base + AP + IV') %>%
              select(dv, var, m = estimate)) %>%
    mutate(estimate = estimate * m,
           se = se * m) %>%
    select(model, iv, dv, var, estimate, se, t, `p-val`, vtype) %>%
    bind_rows(temp) %>%
    filter(var != 'Peer Effect') %>%

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
coefs %>%
    mutate(var  = ifelse(str_detect(var, '(fit)'), 'pfx', var),
           vgrp = ifelse(str_detect(var, '^p[123]...$'), 'main effect',
                        ifelse(str_detect(var, '^alter_'), 'alter policy', 
                              ifelse(str_detect(var, 'DSS$'), 'day slopes', 'peer effect')))) %>%
    ggplot(aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1) +
    facet_grid(vgrp ~ model, scales = 'free') +
    theme(legend.position = 'bottom')

In [None]:
coefs %>%
    filter(var %in% c('p1sdp', 'p2shp', 'p3rop')) %>%
    filter(model %in% c('model1', 'model3', 'model4', 'model6')) %>%
    ggplot(aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1) +
    facet_grid(. ~ model) +
    theme(legend.position = 'bottom')
    

In [None]:
coefs %>%
    filter(var %in% c('alter_p1sdp', 'alter_p2shp', 'atler_p3rop')) %>%
    filter(model %in% c('model1', 'model3', 'model4', 'model6')) %>%
    ggplot(aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1) +
    facet_grid(. ~ model) +
    theme(legend.position = 'bottom')
    

In [None]:
did <- foreach(dv = dvs, .combine = rbind) %dopar% get_coefs(dv)
ggplot(did, aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1)

In [None]:
did_alterpolicy <- foreach(dv = dvs, .combine = rbind) %dopar% get_coefs(dv, type = 'did_alterpolicy')
ggplot(did_alterpolicy, aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1)

In [None]:
iv_alterpolicy <- foreach(dv = dvs, .combine = rbind) %dopar% get_coefs(dv, type = 'did_alterpolicy', iv = T)
ggplot(iv_alterpolicy, aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1) +
    ylim(-.1, .1)

In [None]:
get_coefs2 <- function(dv, rform, ivmod = F, ...) {
    lhs <- str_c(dv, ' ~ ')
    weather <- str_c(cns[str_detect(cns, '^[pt][rm][ca][px][01][0-9]$')], collapse = ' + ')
    rhs <- str_c(weather,
                 rform,
                 '| key + date | ')
    if(ivmod == T) {
        alter_weather <- str_c(cns[str_detect(cns, '^alter_[pt][rm][ca][px][01][0-9]$')], collapse = ' + ')
        iv <- ifelse(str_detect(dv, 'log'), str_replace(dv, 'log', 'log_alter'), str_c('alter_', dv))
        ivf <- str_c('(', iv, ' ~ ', alter_weather, ') | cluster')
        rhs <- str_c(rhs, ivf)
    } else {
        rhs <- str_c(rhs, '0 | cluster')
    }
    form <- as.formula(str_c(lhs, rhs))
    model <- felm(form, df, weights = df$n, ...)
    
    as.data.frame(summary(model)$coef) %>%
        mutate(var   = rownames(.),
               dv    = dv,
               model = rform,
               iv    = ivmod) %>%
        filter(!str_detect(var, '[pt][rm][ca][px]')) %>%
        select(7, 8, 6, 5, 1, 2, 3, 4) -> out
    
    colnames(out) <- c('rform', 'iv', 'dv', 'var', 'estimate', 'se', 't', 'p-val')
    return(out)
}

In [None]:
foreach(dv = dvs, .combine = rbind) %do% 
    get_coefs2(dv, ' + p1sdp + p2shp + p3rop + p1sdpDSS + p2shpDSS + p3ropDSS + alter_p1sdp + alter_p2shp + alter_p3rop + alter_p1sdpDSS + alter_p2shpDSS + alter_p3ropDSS') %>%
    mutate(fac = ifelse(str_detect(var, 'DSS$'), 'slopes', 'intercept')) %>%
    ggplot(aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1) +
    facet_grid(fac~., scales = 'free')

In [None]:
foreach(dv = dvs, .combine = rbind) %do% 
    get_coefs2(dv, ' + p1sdp + p2shp + p3rop', iv = T) %>%
    mutate(fac = ifelse(str_detect(var, '(fit)'), 'slopes', 'intercept')) %>%
    ggplot(aes(x = var, y = estimate, color = dv)) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    geom_point(position = position_dodge(width = .5), size = 4) + 
    geom_linerange(aes(ymin = estimate - 1.98 * se, ymax = estimate + 1.98 * se), 
                   position = position_dodge(width = .5), size = 1) +
    facet_grid(fac~., scales = 'free')

In [None]:
coefs %>%
    filter(str_detect( var, 'DSS'), model == 'model2') %>%
    filter(var == 'p3ropDSS')

In [None]:
coefs

In [None]:
options(repr.plot.width=8.28, repr.plot.height=3.8)
df %>% 
    select(key, date, mcbgv, pnchd, pgt1hafh, ppthlt75, pgt2kmt) %>%
    gather(key = 'var', value = value, -key, -date) %>%
    left_join(df %>% select(key, n) %>% distinct()) %>%
    group_by(date, var) %>%
    summarize(value = sum(value * n/sum(n))) %>%
    group_by(var) %>%
    mutate(value = (value - mean(value))/sd(value)) %>%
    ggplot(aes(x = date, y = value, color = var)) + 
    geom_line() + 
    scale_x_date(date_breaks = '1 month') +
    xlab('') + 
    ylab('') +
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=15),
          strip.text.y = element_blank(),
          legend.position = 'bottom')

In [None]:
library(tidyr)

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
df %>%
    select(key, p3rop) %>%
    group_by(key) %>% 
    summarize(n = sum(p3rop)) %>%
    mutate(rq = ntile(n, 5)) %>%
    select(-n) %>%
    left_join(df) %>%
    select(rq, date, log_mcbgv, log_pnchd, log_pgt1hafh, log_ppthlt75, log_pgt2kmt) %>%
    filter(date < as.Date('2020-03-01')) %>%
    group_by(rq, date) %>%
    summarize_all(mean) %>%
    gather(key = 'dv', value = 'value', -rq, -date) %>%
    ggplot(aes(x = date, y = value, color = as.factor(rq))) + 
    geom_line(size = .75) +
    scale_x_date(expand = c(0.01, 0.01)) + 
    facet_grid(dv ~., scales = 'free_y') + 
    ylab('') + xlab('') + 
    labs(color = "Outcome") +
    theme_light() +
    theme(text = element_text(size=12),
          legend.position = 'None')

In [None]:
df 

In [None]:
get_coefs <- function(dv, form, ivmod = F, ...) {
    lhs <- str_c(dv, ' ~ ')
    weather <- str_c(cns[str_detect(cns, '^[pt][rm][ca][px][01][0-9]$')], collapse = ' + ')
    rhs <- str_c(weather,
                 form,
                 '| key + date | ')
    if(ivmod == T) {
        alter_weather <- str_c(cns[str_detect(cns, '^alter_[pt][rm][ca][px][01][0-9]$')], collapse = ' + ')
        iv <- ifelse(str_detect(dv, 'log'), str_replace(dv, 'log', 'log_alter'), str_c('alter_', dv))
        ivf <- str_c('(', iv, ' ~ ', alter_weather, ') | cluster')
        rhs <- str_c(rhs, ivf)
    } else {
        rhs <- str_c(rhs, '0 | cluster')
    }
    form <- as.formula(str_c(lhs, rhs))
    model <- felm(form, df, weights = df$n, ...)
    
    as.data.frame(summary(model)$coef) %>%
        mutate(var   = rownames(.),
               dv    = dv,
               iv    = ivmod) %>%
        filter(!str_detect(var, '[pt][rm][ca][px]')) -> out
    
    return(out)
}

In [None]:
form <- ' + p1sdp + p2shp + p3rop + stalter_p1sdp + stalter_p2shp + stalter_p3rop'
foreach(dv = dvs, .combine = rbind) %do% get_coefs(dv, form, ivmod = T) -> temp

In [None]:
temp %>%
    rename(estimate = Estimate, se = `Cluster s.e.`) %>%
    mutate(estimate = estimate * 100,
           se = se * 100)

In [None]:
temp %>%
    rename(estimate = Estimate, se = `Cluster s.e.`) %>%
    mutate(estimate = estimate * 100,
           se = se * 100)

In [None]:
options(repr.plot.width=10, repr.plot.height=50)
df %>%
    filter(date >= as.Date('2020-03-01')) %>%
    group_by(cluster, date) %>%
    mutate(p = n/sum(n)) %>%
    summarize(p2 = sum(p2shp * p),
              p3 = sum(p3rop * p),
              a2 = sum(alter_p2shp * p),
              a3 = sum(alter_p3rop * p)) %>%
    gather(key = key, value = value, a2, a3, p2, p3) %>%
    ggplot(aes(x = date, y = value, color = key)) +
    geom_line() + 
    facet_grid(cluster~.)
    

In [None]:
library(tidyr)

In [33]:
df %>% select(key) %>% distinct()

key
<chr>
01001
01003
01005
01007
01009
01011
01013
01015
01017
01019
