In [8]:
import csv
import pandas as pd
import numpy as np
from os import listdir
import re
from datetime import date

stringency_dic = {}

# Read in stringency index data
with open('Data/stringency_index_avg.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
    next(reader, None)  # skip the headers
    for row in reader:
        if list(row)[5] == 'NAT_TOTAL':
            stringency_dic[list(row)[2]] = list(row)[6:]
        
    
stringency_table = pd.DataFrame(data=stringency_dic)

# Align name for 'Slovakia' with comix name
stringency_table.rename(columns={'Slovak Republic': 'Slovakia'}, inplace=True)


In [9]:
start = date(2020,1,1)

# Compute number of days since 1/1/2020
def convertDate(survey_date):
    
    if str(survey_date)=='nan':
        return None
    
    if len(survey_date) < 10:
        print('Invalid date format')
        return None
    
    year = int(survey_date[0:4])
    
    month = int(survey_date[5:7])
    
    day = int(survey_date[8:10])
    
    date_out = date(year,month,day)
    
    return (date_out-start).days


# Compute average stringency across all survey days
def getAvgStringency(country,dates):
    
    stringency_vals = []
    
    for d in dates:
        
        d_converted = convertDate(d)
        
        if d_converted:
            stringency_vals.append(float(stringency_table[country][d_converted]))
        
    return np.mean(stringency_vals)

# Compute all stringency values and dates across all survey days
def getStringency(country,dates):
    
    stringency_vals = []
    
    dates_out = []
    
    for d in dates:
        
        d_converted = convertDate(d)
        
        dates_out.append(d_converted)
        
        if d_converted:
            stringency_vals.append(float(stringency_table[country][d_converted]))
        
    return (stringency_vals, dates_out)


In [10]:
## UK STRINGENCY INVESTIGATION

# Assign stringency value to each participant and contact based on intervention stringency on survey day

comix_files = ['Data/United Kingdom (C)']


for file in comix_files:
    
    # define country name
    country = file[5:-4]
    
    # get sday filename
    dir_contents = listdir(file)
    r = re.compile(".*sday.csv")
    filename = file + '/' + list(filter(r.match, dir_contents))[0]
    
    data_day = pd.read_csv(filename)
    
    proc = file + '/Processed'
    
    dir_contents = listdir(proc)
    r = re.compile(".*contact_common.csv")
    filename = proc + '/' + list(filter(r.match, dir_contents))[0]
    
    data_contacts = pd.read_csv(filename)


    merged_contacts = pd.merge(data_day, data_contacts, on="part_id", how='right')
    
    data_si_contacts = getStringency('United Kingdom', merged_contacts['sday_id'])
    
    data_contacts['si'] = data_si_contacts[0]
    
    dir_contents = listdir(proc)
    r = re.compile(".*participant_common.csv")
    filename = proc + '/' + list(filter(r.match, dir_contents))[0]
    
    data_parts = pd.read_csv(filename)
    
    merged_parts = pd.merge(data_day, data_parts, on="part_id", how='right')
    
    data_si_parts = getStringency('United Kingdom', merged_parts['sday_id'])
    
    data_parts['si'] = data_si_parts[0]
    

In [15]:
# Filter survey into contact events occurring on survey days with intervention stringency between the split values

splits = [40,55,70]

weakest_contacts = data_contacts.loc[data_contacts['si'] <= splits[0]]
weak_contacts = data_contacts.loc[(data_contacts['si'] > splits[0]) & (data_contacts['si'] <= splits[1])]
strong_contacts = data_contacts.loc[(data_contacts['si'] > splits[1]) & (data_contacts['si'] <= splits[2])]
strongest_contacts = data_contacts.loc[data_contacts['si'] > splits[2]]

weakest_contacts = weakest_contacts.drop('si', axis=1)
weak_contacts = weak_contacts.drop('si', axis=1)
strong_contacts = strong_contacts.drop('si', axis=1)
strongest_contacts = strongest_contacts.drop('si', axis=1)

weakest_contacts = weakest_contacts.set_index('part_id')                                    
weak_contacts = weak_contacts.set_index('part_id')
strong_contacts = strong_contacts.set_index('part_id')
strongest_contacts = strongest_contacts.set_index('part_id')

weakest_contacts.to_csv('Data/United Kingdom (C)/Filtered/Weakest/CoMix_uk_contact_common.csv', na_rep='NA')                                    
weak_contacts.to_csv('Data/United Kingdom (C)/Filtered/Weak/CoMix_uk_contact_common.csv', na_rep='NA')
strong_contacts.to_csv('Data/United Kingdom (C)/Filtered/Strong/CoMix_uk_contact_common.csv', na_rep='NA')
strongest_contacts.to_csv('Data/United Kingdom (C)/Filtered/Strongest/CoMix_uk_contact_common.csv', na_rep='NA')                                    

weakest_parts = data_parts.loc[data_parts['si'] <= splits[0]]
weak_parts = data_parts.loc[(data_parts['si'] > splits[0]) & (data_parts['si'] <= splits[1])]
strong_parts = data_parts.loc[(data_parts['si'] > splits[1]) & (data_parts['si'] <= splits[2])]
strongest_parts = data_parts.loc[data_parts['si'] > splits[2]]

weakest_parts = weakest_parts.drop('si', axis=1)                              
weak_parts = weak_parts.drop('si', axis=1)
strong_parts = strong_parts.drop('si', axis=1)
strongest_parts = strongest_parts.drop('si', axis=1)
                              
weakest_parts = weakest_parts.set_index('part_id')
weak_parts = weak_parts.set_index('part_id')
strong_parts = strong_parts.set_index('part_id')
strongest_parts = strongest_parts.set_index('part_id')

weakest_parts.to_csv('Data/United Kingdom (C)/Filtered/Weakest/CoMix_uk_participant_common.csv', na_rep='NA')                              
weak_parts.to_csv('Data/United Kingdom (C)/Filtered/Weak/CoMix_uk_participant_common.csv', na_rep='NA')
strong_parts.to_csv('Data/United Kingdom (C)/Filtered/Strong/CoMix_uk_participant_common.csv', na_rep='NA')
strongest_parts.to_csv('Data/United Kingdom (C)/Filtered/Strongest/CoMix_uk_participant_common.csv', na_rep='NA')