In [11]:
library(dplyr)
library(tidyr)
library(readr)
library(stringr)
library(sf)

In [50]:
locale()

<locale>
Numbers:  123,456.78
Formats:  %AD / %AT
Timezone: UTC
Encoding: UTF-8
<date_names>
Days:   Sunday (Sun), Monday (Mon), Tuesday (Tue), Wednesday (Wed), Thursday
        (Thu), Friday (Fri), Saturday (Sat)
Months: January (Jan), February (Feb), March (Mar), April (Apr), May (May),
        June (Jun), July (Jul), August (Aug), September (Sep), October
        (Oct), November (Nov), December (Dec)
AM/PM:  AM/PM

In [12]:
fp <- '/pool001/mfzhao/'

gec_to_iso <- read_csv(str_c(fp, 'geo_data/gec_to_iso.csv'))
as.data.frame(read_rds(str_c(fp, 'geo_data/europe_LRmap.RDS'))) %>%
    select(-geometry) -> eu_data
fips <- read_csv(str_c(fp, 'geo_data/county_fips_master.csv')) %>%
    mutate(fips = as.character(fips)) %>%
    select(fips, county_name, state_abbr, state_name) %>%
    distinct()

policy <- read_csv(str_c(fp, 'policy/hit-covid-longdata.csv'), 
                   col_types = cols(locality = col_character(), 
                                    reduced_capacity = col_character(), 
                                    symp_screening = col_character(), 
                                    usa_county = col_character(), 
                                    usa_county_code = col_character(),
                                    duration = col_double()))

countries <- data.frame(
  country = c('AUT', 'BEL', 'BGR', 'HRV', 'CYP', 'CZE', 'DNK', 'EST', 'FIN',
              'FRA', 'DEU', 'IRL', 'ITA', 'LVA', 'LIE', 'LTU', 'LUX', 'MLT',
              'NLD', 'NOR', 'POL', 'PRT', 'ROU', 'SVK', 'SVN', 'ESP', 'SWE',
              'CHE', 'GBR', 'USA'), stringsAsFactors = F)

Parsed with column specification:
cols(
  Country = [31mcol_character()[39m,
  gec = [31mcol_character()[39m,
  iso2 = [31mcol_character()[39m,
  iso3 = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  fips = [32mcol_double()[39m,
  county_name = [31mcol_character()[39m,
  state_abbr = [31mcol_character()[39m,
  state_name = [31mcol_character()[39m,
  long_name = [31mcol_character()[39m,
  sumlev = [32mcol_double()[39m,
  region = [32mcol_double()[39m,
  division = [32mcol_double()[39m,
  state = [32mcol_double()[39m,
  county = [32mcol_double()[39m,
  crosswalk = [31mcol_character()[39m,
  region_name = [31mcol_character()[39m,
  division_name = [31mcol_character()[39m
)



In [16]:
policy %>%
    inner_join(countries) -> policy

policy %>%
    filter(country == 'USA') -> policy_us

policy %>%
    filter(country != 'USA') %>%
    rename(key = usa_county_code) -> policy_eu

Joining, by = "country"



In [48]:
policy_us %>%
    filter(!is.na(locality)) %>%
    mutate(usa_county      = ifelse(locality == 'Atlanta', 'Fulton County', usa_county),
         usa_county_code = ifelse(locality == 'Atlanta', '13121', usa_county_code),
         usa_county      = ifelse(locality == 'City of Jackson', 'Hinds County', usa_county),
         usa_county_code = ifelse(locality == 'City of Jackson', '28049', usa_county_code),
         usa_county      = ifelse(locality == 'Galveston County', 'Galveston County', usa_county),
         usa_county_code = ifelse(locality == 'Galveston County', '48167', usa_county_code),
         usa_county      = ifelse(locality == 'Graham County', 'Graham County', usa_county),
         usa_county_code = ifelse(locality == 'Graham County', '37075', usa_county_code),
         usa_county      = ifelse(locality == 'Hays County', 'Hays County', usa_county),
         usa_county_code = ifelse(locality == 'Hays County', '48209', usa_county_code),
         usa_county      = ifelse(locality == 'Lauderdale County', 'Lauderdale County', usa_county),
         usa_county_code = ifelse(locality == 'Lauderdale County', '1077', usa_county_code),
         usa_county      = ifelse(locality == 'Rare County', 'Dare County', usa_county),
         usa_county_code = ifelse(locality == 'Rare County', '37055', usa_county_code)) %>%
    filter(record_id != 145 & record_id != 146 & record_id != 40) %>%
    bind_rows(
    policy_us %>% 
        filter(record_id == 145 | record_id == 146) %>%
        uncount(2) %>%
        mutate(usa_county = rep(c('Fairbanks North Star Borough', 'Ketchikan Gateway Borough'), 2),
             usa_county_code = rep(c('2090', '2130'), 2)),
    policy_us %>% 
        filter(record_id == 40) %>%
        uncount(5) %>%
        mutate(usa_county = rep(c('Bronx County', 'Kings County', 'New York County', 'Queens County', 'Richmond County'), 2),
             usa_county_code = rep(c('36005', '36047', '36061', '36081', '36085'), 2))) %>%
    select(country, usa_county_code, intervention_group, date_of_update, status_simp) %>%
    mutate(policy_level = 'us_county') -> policy_us_local

policy_us %>%
    filter(is.na(locality)) %>%
    filter(!is.na(usa_county)) %>%
    select(country, usa_county_code, intervention_group, date_of_update, status_simp) %>%
    mutate(policy_level = 'us_county')-> policy_us_county

fips %>%
    select(state_name, state_abbr, fips) %>%
    left_join(policy_us %>%
              filter(is.na(locality)) %>%
              filter(is.na(usa_county)), by = c('state_name' = 'admin1_name')) %>%
    mutate(usa_county_code = as.character(fips),
         policy_level = 'us_state') %>%
    select(country, usa_county_code, intervention_group, date_of_update, status_simp, policy_level) -> policy_us_state

fips %>%
  mutate(country = 'USA') %>%
  left_join(policy_us %>% filter(national_entry == 'Yes')) %>%
  mutate(usa_county_code = as.character(fips),
         policy_level = 'us_national') %>%
  select(country, usa_county_code, intervention_group, date_of_update, status_simp, policy_level) -> policy_us_national

bind_rows(policy_us_local, policy_us_county, policy_us_state, policy_us_national) %>%
  rename(key = usa_county_code) %>%
  distinct() -> policy_us

Joining, by = "country"



In [30]:
policy_us %>%
    filter(is.na(admin1_name), national_entry == 'No')

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


unique_id,record_id,update,entry_time,national_entry,country,country_name,admin1,admin1_name,locality,⋯,reduced_capacity,symp_screening,enforcement,size,duration,testing_population,details,url,source_document_url,entry_quality
<chr>,<dbl>,<chr>,<dttm>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>


In [5]:
policy_eu %>%
    filter(!is.na(locality)) %>%
    mutate(key = ifelse(locality == 'Tandarei', 'RO315', key),
           key = ifelse(str_detect(locality, 'Bertonico'), 'ITC49', key),
           key = ifelse(locality == 'Istarska (county)', 'HR036', key),
           key = ifelse(locality == 'Isle of Wight', 'UKJ34', key),
           key = ifelse(locality == 'Oise', 'FRE22', key),
           key = ifelse(locality == 'Morbihan', 'FREH04', key),
           key = ifelse(locality == 'Ajaccio', 'FRM01', key)) %>%
    filter(record_id != 2528) %>%
    bind_rows(policy_eu %>%
              filter(record_id == 2528) %>%
              uncount(2) %>%
              mutate(key = rep(c('FRF11', 'FRF12'), 3))) %>%
    filter(!is.na(key)) %>%
    mutate(policy_level = 'eu_local') %>%
    select(country, key, intervention_group, date_of_update, status_simp, policy_level) -> policy_eu_local

getNUTS3 <- function(nm, ptrn) {
  eu_data %>%
    filter(str_detect(NUTS_ID, ptrn)) %>%
    filter(LEVL_CODE == 3) %>%
    select(NUTS_ID) %>%
    mutate(admin1_name = nm) -> out
  return(out)
}

bind_rows(
    getNUTS3('Basel-Stadt', '^CH031'),
    getNUTS3('Vaud', '^CH011'),
    getNUTS3('Valais', '^CH012'),
    getNUTS3('Z"urich', '^CH040'),
    getNUTS3('Aargau', '^CH033'),
    getNUTS3('Appenzell Innerrhoden', '^CH054'),
    getNUTS3('Bern', '^CH021'),
    getNUTS3('Gen`eve', '^CH013'),
    getNUTS3('Jura', '^CH025'),
    getNUTS3('Neuch^atel', '^CH024'),
    getNUTS3('Baden-W"urttemberg', '^DE1'),
    getNUTS3('Berlin', '^DE3'),
    getNUTS3('Brandenburg', '^DE4'),
    getNUTS3('Sachsen', '^DED'),
    getNUTS3('Midtjylland', '^DK04'),
    getNUTS3('Nordjylland', '^DK05'),
    getNUTS3('Syddanmark', '^DK03'),
    getNUTS3('La Rioja', '^ES230'),
    getNUTS3("Regi'on de Murcia", '^ES620'),
    getNUTS3("Pa'is Vasco", '^ES21'),
    getNUTS3('Galicia', '^ES11'),
    getNUTS3('Comunidad de Madrid', '^ES300'),
    getNUTS3('England', '^UK[CDEFGHIJK]'),
    getNUTS3('Northern Ireland', '^UKN'),
    getNUTS3('Scotland', '^UKM'),
    getNUTS3('Wales', '^UKL'),
    getNUTS3('Carlow', '^IE052'),
    getNUTS3('Cavan', '^IE041'),
    getNUTS3('Donegal', '^IE041'),
    getNUTS3('Clare', '^IE051'),
    getNUTS3('Cork', '^IE053'),
    getNUTS3('Galway', '^IE042'),
    getNUTS3('Kerry', '^IE053'),
    getNUTS3('Lombardia', '^ITC4'),
    getNUTS3('Abruzzo', '^ITF1'),
    getNUTS3('Liguria', '^ITC3'),
    getNUTS3('Piemonte', '^ITC1'),
    getNUTS3('Trentino-Alto Adige', '^ITD[12]'),
    getNUTS3('Friuli-Venezia Giulia', '^ITD4'),
    getNUTS3('Apulia', '^ITF4'),
    getNUTS3('Basilicata', '^ITF5'),
    getNUTS3('Calabria', '^ITF6'),
    getNUTS3('Campania', '^ITF3'),
    getNUTS3('Emilia-Romagna', '^ITD5'),
    getNUTS3('Veneto', '^ITD3'),
    getNUTS3('Toscana', '^ITE1'),
    getNUTS3("Valle d'Aosta", '^ITC2'),
    getNUTS3('Umbria', '^ITE2'),
    getNUTS3('Schellenberg', '^LI000'),
    getNUTS3('Mauren', '^LI000'),
    getNUTS3('Noord-Brabant', '^NL41'),
    getNUTS3('Suceava', '^RO215')) %>% 
    left_join(policy_eu %>%
              filter(is.na(locality)) %>%
              filter(!is.na(admin1_name))) %>%
    mutate(key = ifelse(is.na(key), NUTS_ID, key)) %>%
    mutate(policy_level = 'eu_admin1') %>%
    select(country, key, intervention_group, date_of_update, status_simp, policy_level) -> policy_eu_admin1

countries %>%
    inner_join(gec_to_iso, by = c('country' = 'iso3')) %>%
    select(country, CNTR_CODE = iso2) %>%
    inner_join(eu_data) %>%
    filter(LEVL_CODE == 3) %>%
    select(country, NUTS_ID) %>%
    left_join(policy_eu %>% filter(national_entry == 'Yes')) %>%
    mutate(key = ifelse(is.na(key), NUTS_ID, key),
           policy_level = 'eu_national') %>%
    select(country, key, intervention_group, date_of_update, status_simp, policy_level) -> policy_eu_national

bind_rows(policy_eu_local, policy_eu_admin1, policy_eu_national) %>%
    distinct() -> policy_eu

Joining, by = "admin1_name"

Joining, by = "CNTR_CODE"

Joining, by = "country"



In [6]:
bind_rows(policy_eu, policy_us) %>%
    filter(!is.na(intervention_group), !is.na(status_simp), status_simp != 'Unknown') %>%
    inner_join(gec_to_iso, by = c('country' = 'iso3')) %>%
    select(country = iso2, key, intervention_group, date = date_of_update, status_simp, policy_level) %>%
    mutate(key = str_pad(key, 5, pad = '0')) -> policy

write_csv(policy, str_c(fp, 'PROCESSED_DATA/policy_dates_and_levels.csv'))

In [7]:
policy %>%
    mutate(status = ifelse(status_simp == 'Strongly Implemented', 1, 
                           ifelse(status_simp == 'Partially Implemented', 0.5, 0))) %>%
    group_by(country, key, intervention_group, date) %>%
    summarize(status = min(status)) %>%
    spread(key = 'intervention_group', value = 'status') %>%
    ungroup() %>%
    select(-country) -> policy

In [8]:
expand.grid(key = unique(policy$key), 
            date = seq.Date(as.Date('2020-01-01'), as.Date('2020-05-31'), 'day'), 
            stringsAsFactors = F) %>%
    left_join(policy) %>%
    group_by(key) %>%
    fill('closed_border', 'contact_tracing', 'enforcement_deployed', 'entertainment_closed', 'household_confined', 'limited_mvt', 'mask', 
         'nursing_home_closed', 'office_closed', 'public_space_closed', 'public_transport_closed', 'quar_iso', 'religion_closed', 
         'restaurant_closed', 'school_closed', 'social_group_limits', 'state_of_emergency', 'store_closed', 'symp_screening', 
         'testing_asymp', 'testing_symp') %>%
    replace_na(list(closed_border = 0, contact_tracing = 0, enforcement_deployed = 0, entertainment_closed = 0, household_confined = 0, 
                    limited_mvt= 0, mask= 0, nursing_home_closed = 0, office_closed = 0, public_space_closed = 0, public_transport_closed = 0,
                    quar_iso = 0, religion_closed = 0, restaurant_closed = 0, school_closed = 0, social_group_limits = 0, state_of_emergency = 0,
                    store_closed = 0, symp_screening = 0, testing_asymp = 0, testing_symp = 0)) -> policy

write_csv(policy, str_c(fp, 'PROCESSED_DATA/policy_full.csv'))

Joining, by = c("key", "date")

