In [1]:
# Import statements
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from statsmodels.tsa.stattools import acf

In [2]:
# Preprocessing case data
cases21 = pd.read_csv('Data/newCasesPCROnlyBySpecimenDate_ltla_2021.csv')
cases21 = cases21[['date','area_code','area_name','value']]
cases20 = pd.read_csv('Data/newCasesPCROnlyBySpecimenDate_ltla_2020.csv')
cases20 = cases20[['date','area_code','area_name','value']]
cases20_additional = pd.read_csv('Data/ltla_2023-12-14 (1).csv')
cases20_additional.rename(columns={'newCasesBySpecimenDate':'value','areaCode':'area_code','areaName':'area_name'},inplace=True)
cases20_additional.drop(columns=['areaType','cumCasesBySpecimenDate','cumCasesBySpecimenDateRate'],inplace=True)
cases20_additional = cases20_additional[cases20_additional['area_name'].isin(cases20['area_name'].values)]
cases20_additional['date'] = pd.to_datetime(cases20_additional['date'])
cases20_additional = cases20_additional[cases20_additional['date'] < '2020-11-01']
cases20_additional = cases20_additional[cases20_additional['date'] >= '2020-10-01']
cases20 = pd.concat([cases20,cases20_additional]).sort_index()
pop = pd.read_csv('Data/census_estimate.csv')
# Rename columns and take weekly incidence instead of daily
cases = pd.concat([cases20,cases21])
cases['date'] = pd.to_datetime(cases['date'])
cases.set_index('date', inplace=True)
cases.sort_index(inplace=True)
weekly_counts_total = cases.groupby(['area_code','area_name']).rolling(7).sum().reset_index()
# Convert incidence to a proportion to allow for cross-LTLA comparison
weekly_counts_total = pd.merge(weekly_counts_total,pop,on='area_code')
weekly_counts_total['incidence'] = weekly_counts_total['value'] / weekly_counts_total['population']

In [3]:
# Add in the ITL regions for spatial detrending
npis = pd.read_csv('Data/England_LTLA_NHSER.csv')
npis = npis[['NHSER_name','LTLA_name']]
npis.rename(columns={'NHSER_name':'region','LTLA_name':'ltla20nm'},inplace=True)
npis.drop_duplicates(inplace=True)

manual_dat = pd.DataFrame({'ltla20nm': ['Wycombe','South Bucks','Chiltern','Aylesbury Vale','Cornwall and Isles of Scilly', 'Hackney and City of London'], \
                           'region': ['South East', 'South East', 'South East', 'South East', 'South West', 'London']})
npis = pd.concat([npis,manual_dat])

In [4]:
# Cases with ITL regions
cases_region = pd.merge(cases.reset_index(), npis, left_on='area_name', right_on='ltla20nm')
cases_region.drop(columns='ltla20nm',inplace=True)

In [5]:
# Add in the ITL regions for spatial detrending
weekly_counts_total = pd.merge(weekly_counts_total, npis, left_on='area_name', right_on='ltla20nm')
weekly_counts_total.drop(columns='ltla20nm',inplace=True)

In [6]:
# Get the regional mean inicidences and spatially detrend
regional_mean_total = weekly_counts_total.groupby(['region', 'date'])['incidence'].mean().reset_index()
regional_mean_total.rename(columns={'incidence': 'regional_mean_incidence'}, inplace=True)
weekly_counts_total = pd.merge(weekly_counts_total, regional_mean_total, on=['region', 'date'])
weekly_counts_total['detrended'] = weekly_counts_total['incidence'] - weekly_counts_total['regional_mean_incidence']

In [7]:
# Convert to wide format for ease of future calculations
wide_data_total = weekly_counts_total.pivot(index='date', columns='area_name', values='detrended')
wide_data_incidence = weekly_counts_total.pivot(index='date', columns='area_name', values='incidence')

In [8]:
wide_data_incidence.to_csv('Data/wide_incidence.csv')
wide_data_total.to_csv('Data/wide_total.csv')
npis.to_csv('Data/ltla_to_nhs.csv')
weekly_counts_total.to_csv('Data/weekly_total.csv')

In [9]:
# Preprocessing hospital data
hosps21 = pd.read_csv('Data/newAdmissions_nhsRegion_2021.csv')
hosps20 = pd.read_csv('Data/newAdmissions_nhsRegion_2020.csv')
hosps21 = hosps21[['date','area_code','area_name','value']]
hosps20 = hosps20[['date','area_code','area_name','value']]
hosps = pd.concat([hosps20,hosps21])
hosps['date'] = pd.to_datetime(hosps['date'])
hosps.set_index('date',inplace=True)
hosps.sort_index(inplace=True)
hosps_weekly = hosps.groupby(['area_code','area_name']).rolling(7).sum().reset_index()

In [10]:
# Add in the NHS populations for normalisation
npis = pd.read_csv('Data/England_LTLA_NHSER.csv')
npis = npis[['NHSER_name','LTLA_name']]
npis.rename(columns={'NHSER_name':'region','LTLA_name':'ltla20nm'},inplace=True)
npis.drop_duplicates(inplace=True)
pop = pd.read_csv('Data/census_estimate.csv')
manual_dat = pd.DataFrame({'ltla20nm': ['Wycombe','South Bucks','Chiltern','Aylesbury Vale','Cornwall and Isles of Scilly', 'Hackney and City of London'], \
                           'region': ['South East', 'South East', 'South East', 'South East', 'South West', 'London']})
npis = pd.concat([npis,manual_dat])
ltla_pop = pd.read_csv('Data/newCasesPCROnlyBySpecimenDate_ltla_2020.csv')
ltla_pop = ltla_pop[['area_code','area_name']]
ltla_pop.drop_duplicates(inplace=True)
ltla_pop = pd.merge(ltla_pop, npis, left_on='area_name', right_on='ltla20nm').drop(columns='ltla20nm')
ltla_pop = pd.merge(ltla_pop, pop)
nhs_pop = ltla_pop.groupby('region').sum().drop(columns=['area_code','area_name'])
nhs_pop.reset_index(inplace=True)
hosps_weekly = pd.merge(hosps_weekly,nhs_pop,left_on='area_name',right_on='region').drop(columns='region')
hosps_weekly['incidence'] = hosps_weekly['value'] / hosps_weekly['population']

In [11]:
# Get the national mean inicidence to spatially detrend
national_mean_total = hosps_weekly.groupby(['date'])['incidence'].mean().reset_index()
national_mean_total.rename(columns={'incidence': 'national_mean_incidence'}, inplace=True)
hosps_weekly = pd.merge(hosps_weekly, national_mean_total, on='date')
hosps_weekly['detrended'] = hosps_weekly['incidence'] - hosps_weekly['national_mean_incidence']
# Convert to wide format for ease of future calculations
wide_data_hosp_inc = hosps_weekly.pivot(index='date', columns='area_name', values='detrended')

In [12]:
# Preprocessing hospital data
hospcases21 = pd.read_csv('Data/hospitalCases_nhsRegion_2020.csv')
hospcases20 = pd.read_csv('Data/hospitalCases_nhsRegion_2021.csv')
hospcases21 = hospcases21[['date','area_code','area_name','value']]
hospcases20 = hospcases20[['date','area_code','area_name','value']]
hospcases = pd.concat([hospcases20,hospcases21])
hospcases['date'] = pd.to_datetime(hospcases['date'])
hospcases.set_index('date',inplace=True)
hospcases.sort_index(inplace=True)
hospcases_weekly = hospcases.groupby(['area_code','area_name']).rolling(7).sum().reset_index()

# Add in the NHS populations for normalisation
hospcases_weekly = pd.merge(hospcases_weekly,nhs_pop,left_on='area_name',right_on='region').drop(columns='region')
hospcases_weekly['incidence'] = hospcases_weekly['value'] / hospcases_weekly['population']

# Get the national mean inicidence to spatially detrend
national_mean_total_cases = hospcases_weekly.groupby(['date'])['incidence'].mean().reset_index()
national_mean_total_cases.rename(columns={'incidence': 'national_mean_incidence'}, inplace=True)
hospcases_weekly = pd.merge(hospcases_weekly, national_mean_total_cases, on='date')
hospcases_weekly['detrended'] = hospcases_weekly['incidence'] - hospcases_weekly['national_mean_incidence']
# Convert to wide format for ease of future calculations
wide_data_hosp_cases = hospcases_weekly.pivot(index='date', columns='area_name', values='detrended')

In [13]:
wide_data_hosp_cases.to_csv('Data/wide_hosp_cases.csv')
wide_data_hosp_inc.to_csv('Data/wide_hosp_inc.csv')
hospcases_weekly.to_csv('Data/long_hosp_cases.csv')
hosps_weekly.to_csv('Data/long_hosp_inc.csv')