In [1]:
library(dplyr)
library(tidyr)
library(ggsci)
library(readr)
library(lfe)
library(stringr)
library(ggplot2)
library(doMC)
registerDoMC(4)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix


Attaching package: ‘Matrix’


The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack


Loading required package: foreach

Loading required package: iterators

Loading required package: parallel



In [3]:
df <- data.table::fread('/pool001/mfzhao/PROCESSED_DATA/dyadic_mvmt_2020.csv')
pp <- read_csv('/pool001/mfzhao/PROCESSED_DATA/policyPeriods.csv')

Parsed with column specification:
cols(
  key = [31mcol_character()[39m,
  p1sdp = [34mcol_date(format = "")[39m,
  p2shp = [34mcol_date(format = "")[39m,
  p3rop = [34mcol_date(format = "")[39m
)



In [4]:
df %>% 
    filter(origin_cluster != destination_cluster) %>%
    mutate(oRain  = as.numeric(oPRCP > 0),
           dRain  = as.numeric(dPRCP > 0),
           nearby = as.numeric(dist < 100)) -> df

In [5]:
pp %>%
    mutate(cluster = as.numeric(str_sub(key, 1, 2))) %>%
    select(-key) %>%
    distinct() %>%
    rename(p1date = p1sdp, 
           p2date = p2shp,
           p3date = p3rop) -> statePolicy

In [18]:
cns     <- colnames(df)
wctrls  <- str_c(cns[str_detect(cns, '[od][pt][rm][ca][px]..')], collapse = ' + ')
cdctrls <- str_c(cns[str_detect(cns, '[od][acs][st]_')], collapse = ' + ')
ctrls   <- str_c(wctrls, ' + ', cdctrls)

In [33]:
fri <- function(df) {
    statePolicy %>%
        mutate(cluster = sample(cluster, n(), replace = F)) -> swap
    
    df %>%
        select(-matches('[od]p[123]')) %>%
        left_join(statePolicy %>%
                  rename(origin_cluster = cluster,
                         odate1 = p1date,
                         odate2 = p2date,
                         odate3 = p3date)) %>%
        left_join(statePolicy %>%
                  rename(destination_cluster = cluster,
                         ddate1 = p1date,
                         ddate2 = p2date,
                         ddate3 = p3date)) %>%
        mutate(op1 = as.numeric(difftime(date, odate1, units = 'day') >= 0),
               op2 = as.numeric(difftime(date, odate2, units = 'day') >= 0),
               op3 = as.numeric(difftime(date, odate3, units = 'day') >= 0),
               dp1 = as.numeric(difftime(date, ddate1, units = 'day') >= 0),
               dp2 = as.numeric(difftime(date, ddate2, units = 'day') >= 0),
               dp3 = as.numeric(difftime(date, ddate3, units = 'day') >= 0)) %>%
        replace_na(list(op2 = 0, dp2 = 0)) %>%
        select(-matches('[od]date')) -> temp
    
    f1 <- str_c('log(ndotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3 + ', ctrls, 
                ' | dyad + date | 0 | origin_cluster + destination_cluster')
    f2 <- str_c('log(pdotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3 + ', ctrls, 
                ' | dyad + date | 0 | origin_cluster + destination_cluster')
    
    fit1 <- felm(as.formula(f1), temp)
    fit2 <- felm(as.formula(f2), temp, weights = temp$n)
    
    as.data.frame(summary(fit1)$coef) %>%
        mutate(var  = rownames(.),
               dv   = 'ndotd') %>%
        bind_rows(as.data.frame(summary(fit2)$coef) %>% 
                  mutate(var  = rownames(.),
                         dv   = 'pdotd')) %>%
        rename(estimate = Estimate, se = `Cluster s.e.`) %>%
        filter(str_detect(var, '[od]p[123]')) -> out
    
    return(out)
}

foreach(i = 1:500, .combine = rbind) %dopar% fri(df) -> fri_results

In [20]:
f1 <- str_c('log(ndotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3 + ', ctrls, 
            ' | dyad + date | 0 | origin_cluster + destination_cluster')
f2 <- str_c('log(pdotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3 + ', ctrls, 
            ' | dyad + date | 0 | origin_cluster + destination_cluster')

fit1 <- felm(as.formula(f1), df)
fit2 <- felm(as.formula(f2), df, weights = df$n)

as.data.frame(summary(fit1)$coef) %>%
    mutate(var  = rownames(.),
            dv   = 'ndotd') %>%
    bind_rows(as.data.frame(summary(fit2)$coef) %>% 
              mutate(var  = rownames(.),
                     dv   = 'pdotd')) %>%
    rename(estimate = Estimate, se = `Cluster s.e.`) %>%
    filter(str_detect(var, '[od]p[123]')) -> base

“Negative eigenvalues set to zero in multiway clustered variance matrix. See felm(...,psdef=FALSE)”
“Negative eigenvalues set to zero in multiway clustered variance matrix. See felm(...,psdef=FALSE)”
“the matrix is either rank-deficient or indefinite”
“the matrix is either rank-deficient or indefinite”


In [None]:
vns <- c('dp1'   = "Dest:(ip)",
         'dp2'   = "Dest:(sh)",
         'dp3'   = "Dest:(ro)",
         'op1'   = 'Ori:(ip)',
         'op2'   = 'Ori:(sh)',
         'op3'   = 'Ori:(ro)',
         'pdotd' = 'log_pdotd', 
         'ndotd' = 'log_ndotd')

options(repr.plot.width=15, repr.plot.height=10)
fri_results %>%
    mutate(estimate = .8 * estimate) %>%
    ggplot(aes(x = estimate, fill = dv)) +
    geom_histogram(bins = 40) +
    geom_vline(aes(xintercept = base), data = base) +
    facet_grid(dv ~ var, scales = 'free', labeller = as_labeller(vns)) +
    theme_light() +
    scale_fill_d3() +
    theme(text = element_text(size = 20),
          legend.position = 'bottom') -> p

ggsave('/home/mfzhao/SI_plots/rc3b_fri_dyad.pdf', p, device = 'pdf', width = 6.5, height = 6.5, scale = 2)