In [1]:
# Import statements
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from statsmodels.tsa.stattools import acf

In [29]:
# 10 year age classes
def map_to_age_class(age):
    age_map = {
        '00_04': '00_09', '05_09': '00_09',
        '10_14': '10_19', '15_19': '10_19',
        '20_24': '20_29', '25_29': '20_29',
        '30_34': '30_39', '35_39': '30_39',
        '40_44': '40_49', '45_49': '40_49',
        '50_54': '50_59', '55_59': '50_59',
        '60_64': '60_69', '65_69': '60_69',
        '70_74': '70_79', '75_79': '70_79',
        '80_84': '80_89', '85_89': '80_89',
        '90+': '90+'
        # Add additional mappings as needed
    }
    return age_map.get(age, age)

pop_full = pd.read_csv('ONS-population_2021-08-05.csv')
pop_full = (pop_full[(~pop_full['age'].isin(['00_59', '60+', 'unassigned','18_64','ALL','65_84','6_17'])) & (pop_full['category']=='AGE_ONLY')]).drop(columns=['category','gender'])
pop_full = pop_full[pop_full['areaCode'].str.startswith('E')]
pop_full.rename(columns={'areaCode':'area_code'},inplace=True)
pop_full['age_class'] = pop_full['age'].apply(map_to_age_class)
pop_full.drop(columns='age',inplace=True)
pop_full = pop_full.groupby(['area_code','age_class']).sum().reset_index()

cases21 = pd.read_csv('newCasesBySpecimenDateAgeDemographics_ltla_2021.csv')
cases = (cases21[~cases21['age'].isin(['00_59', '60+', 'unassigned'])]).drop(columns=['area_type','metric_name','rollingSum','rollingRate','metric'])
cases['date'] = pd.to_datetime(cases['date'])
cases['age_class'] = cases['age'].apply(map_to_age_class)
cases.set_index('date',inplace=True)
cases.drop(columns='age',inplace=True)
cases = cases.groupby(['area_code','area_name','age_class','date']).sum().reset_index().set_index('date')
weekly_counts_total = cases.groupby(['area_code','area_name','age_class']).rolling(7).sum().reset_index()
weekly_counts_total

Unnamed: 0,area_code,area_name,age_class,date,cases
0,E06000001,Hartlepool,00_09,2021-01-01,
1,E06000001,Hartlepool,00_09,2021-01-02,
2,E06000001,Hartlepool,00_09,2021-01-03,
3,E06000001,Hartlepool,00_09,2021-01-04,
4,E06000001,Hartlepool,00_09,2021-01-05,
...,...,...,...,...,...
1149745,E09000033,Westminster,90+,2021-12-27,15.0
1149746,E09000033,Westminster,90+,2021-12-28,16.0
1149747,E09000033,Westminster,90+,2021-12-29,16.0
1149748,E09000033,Westminster,90+,2021-12-30,11.0


In [31]:
# Convert incidence to a proportion to allow for cross-LTLA comparison
weekly_counts_total = pd.merge(weekly_counts_total,pop_full,on=['area_code','age_class'])
weekly_counts_total['incidence'] = weekly_counts_total['cases'] / weekly_counts_total['population']

npis = pd.read_csv('England_LTLA_NHSER.csv')
npis = npis[['NHSER_name','LTLA_name']]
npis.rename(columns={'NHSER_name':'region','LTLA_name':'ltla20nm'},inplace=True)
npis.drop_duplicates(inplace=True)
manual_dat = pd.DataFrame({'ltla20nm': ['Wycombe','South Bucks','Chiltern','Aylesbury Vale','Cornwall and Isles of Scilly', 'Hackney and City of London'], \
                           'region': ['South East', 'South East', 'South East', 'South East', 'South West', 'London']})
npis = pd.concat([npis,manual_dat])

cases_region = pd.merge(cases.reset_index(), npis, left_on='area_name', right_on='ltla20nm')
cases_region.drop(columns='ltla20nm',inplace=True)

weekly_counts_total = pd.merge(weekly_counts_total, npis, left_on='area_name', right_on='ltla20nm')
weekly_counts_total.drop(columns='ltla20nm',inplace=True)

regional_mean_total = weekly_counts_total.groupby(['region', 'date', 'age_class'])['incidence'].mean().reset_index()
regional_mean_total.rename(columns={'incidence': 'regional_mean_incidence'}, inplace=True)
regional_mean_total

Unnamed: 0,region,date,age_class,regional_mean_incidence
0,East of England,2021-01-01,00_09,
1,East of England,2021-01-01,10_19,
2,East of England,2021-01-01,20_29,
3,East of England,2021-01-01,30_39,
4,East of England,2021-01-01,40_49,
...,...,...,...,...
25545,South West,2021-12-31,50_59,0.011950
25546,South West,2021-12-31,60_69,0.007921
25547,South West,2021-12-31,70_79,0.005142
25548,South West,2021-12-31,80_89,0.003847


In [32]:
weekly_counts_total = pd.merge(weekly_counts_total, regional_mean_total, on=['region', 'date', 'age_class'])
weekly_counts_total['detrended'] = weekly_counts_total['incidence'] - weekly_counts_total['regional_mean_incidence']
weekly_counts_total

Unnamed: 0,area_code,area_name,age_class,date,cases,population,incidence,region,regional_mean_incidence,detrended
0,E06000001,Hartlepool,00_09,2021-01-01,,10921,,North East and Yorkshire,,
1,E06000001,Hartlepool,00_09,2021-01-02,,10921,,North East and Yorkshire,,
2,E06000001,Hartlepool,00_09,2021-01-03,,10921,,North East and Yorkshire,,
3,E06000001,Hartlepool,00_09,2021-01-04,,10921,,North East and Yorkshire,,
4,E06000001,Hartlepool,00_09,2021-01-05,,10921,,North East and Yorkshire,,
...,...,...,...,...,...,...,...,...,...,...
1149745,E09000033,Westminster,90+,2021-12-27,15.0,1863,0.008052,London,0.008779,-0.000728
1149746,E09000033,Westminster,90+,2021-12-28,16.0,1863,0.008588,London,0.010165,-0.001576
1149747,E09000033,Westminster,90+,2021-12-29,16.0,1863,0.008588,London,0.011481,-0.002893
1149748,E09000033,Westminster,90+,2021-12-30,11.0,1863,0.005904,London,0.013040,-0.007135


In [33]:
wide_data_total = weekly_counts_total.pivot(index='date', columns=['area_name','age_class'], values='detrended')
wide_data_incidence = weekly_counts_total.pivot(index='date', columns=['area_name','age_class'], values='incidence')

wide_data_incidence.to_csv('wide_incidence_10year.csv')
wide_data_total.to_csv('wide_total_10year.csv')
weekly_counts_total.to_csv('weekly_total_10year.csv')

In [2]:
pop_full = pd.read_csv('ONS-population_2021-08-05.csv')
pop_full = (pop_full[(~pop_full['age'].isin(['00_59', '60+', 'unassigned','18_64','ALL','65_84','6_17'])) & (pop_full['category']=='AGE_ONLY')]).drop(columns=['category','gender'])
pop_full = pop_full[pop_full['areaCode'].str.startswith('E')]
pop_full.rename(columns={'areaCode':'area_code'},inplace=True)

cases21 = pd.read_csv('newCasesBySpecimenDateAgeDemographics_ltla_2021.csv')
cases = (cases21[~cases21['age'].isin(['00_59', '60+', 'unassigned'])]).drop(columns=['area_type','metric_name','rollingSum','rollingRate','metric'])
cases['date'] = pd.to_datetime(cases['date'])
cases.set_index('date',inplace=True)
weekly_counts_total = cases.groupby(['area_code','area_name','age']).rolling(7).sum().reset_index()
weekly_counts_total

Unnamed: 0,area_code,area_name,age,date,cases
0,E06000001,Hartlepool,00_04,2021-01-01,
1,E06000001,Hartlepool,00_04,2021-01-02,
2,E06000001,Hartlepool,00_04,2021-01-03,
3,E06000001,Hartlepool,00_04,2021-01-04,
4,E06000001,Hartlepool,00_04,2021-01-05,
...,...,...,...,...,...
2184520,E09000033,Westminster,90+,2021-12-27,15.0
2184521,E09000033,Westminster,90+,2021-12-28,16.0
2184522,E09000033,Westminster,90+,2021-12-29,16.0
2184523,E09000033,Westminster,90+,2021-12-30,11.0


In [3]:
# Convert incidence to a proportion to allow for cross-LTLA comparison
weekly_counts_total = pd.merge(weekly_counts_total,pop_full,on=['area_code','age'])
weekly_counts_total['incidence'] = weekly_counts_total['cases'] / weekly_counts_total['population']

npis = pd.read_csv('England_LTLA_NHSER.csv')
npis = npis[['NHSER_name','LTLA_name']]
npis.rename(columns={'NHSER_name':'region','LTLA_name':'ltla20nm'},inplace=True)
npis.drop_duplicates(inplace=True)
manual_dat = pd.DataFrame({'ltla20nm': ['Wycombe','South Bucks','Chiltern','Aylesbury Vale','Cornwall and Isles of Scilly', 'Hackney and City of London'], \
                           'region': ['South East', 'South East', 'South East', 'South East', 'South West', 'London']})
npis = pd.concat([npis,manual_dat])

cases_region = pd.merge(cases.reset_index(), npis, left_on='area_name', right_on='ltla20nm')
cases_region.drop(columns='ltla20nm',inplace=True)

weekly_counts_total = pd.merge(weekly_counts_total, npis, left_on='area_name', right_on='ltla20nm')
weekly_counts_total.drop(columns='ltla20nm',inplace=True)

regional_mean_total = weekly_counts_total.groupby(['region', 'date', 'age'])['incidence'].mean().reset_index()
regional_mean_total.rename(columns={'incidence': 'regional_mean_incidence'}, inplace=True)
regional_mean_total

Unnamed: 0,region,date,age,regional_mean_incidence
0,East of England,2021-01-01,00_04,
1,East of England,2021-01-01,05_09,
2,East of England,2021-01-01,10_14,
3,East of England,2021-01-01,15_19,
4,East of England,2021-01-01,20_24,
...,...,...,...,...
48540,South West,2021-12-31,70_74,0.005350
48541,South West,2021-12-31,75_79,0.004852
48542,South West,2021-12-31,80_84,0.003891
48543,South West,2021-12-31,85_89,0.003783


In [4]:
weekly_counts_total = pd.merge(weekly_counts_total, regional_mean_total, on=['region', 'date', 'age'])
weekly_counts_total['detrended'] = weekly_counts_total['incidence'] - weekly_counts_total['regional_mean_incidence']
weekly_counts_total

Unnamed: 0,area_code,area_name,age,date,cases,population,incidence,region,regional_mean_incidence,detrended
0,E06000001,Hartlepool,00_04,2021-01-01,,5147,,North East and Yorkshire,,
1,E06000001,Hartlepool,00_04,2021-01-02,,5147,,North East and Yorkshire,,
2,E06000001,Hartlepool,00_04,2021-01-03,,5147,,North East and Yorkshire,,
3,E06000001,Hartlepool,00_04,2021-01-04,,5147,,North East and Yorkshire,,
4,E06000001,Hartlepool,00_04,2021-01-05,,5147,,North East and Yorkshire,,
...,...,...,...,...,...,...,...,...,...,...
2184520,E09000033,Westminster,90+,2021-12-27,15.0,1863,0.008052,London,0.008779,-0.000728
2184521,E09000033,Westminster,90+,2021-12-28,16.0,1863,0.008588,London,0.010165,-0.001576
2184522,E09000033,Westminster,90+,2021-12-29,16.0,1863,0.008588,London,0.011481,-0.002893
2184523,E09000033,Westminster,90+,2021-12-30,11.0,1863,0.005904,London,0.013040,-0.007135


In [None]:
wide_data_total = weekly_counts_total.pivot(index='date', columns=['area_name','age'], values='detrended')
wide_data_incidence = weekly_counts_total.pivot(index='date', columns=['area_name','age'], values='incidence')

wide_data_incidence.to_csv('wide_incidence.csv')
wide_data_total.to_csv('wide_total.csv')
npis.to_csv('ltla_to_nhs.csv')
weekly_counts_total.to_csv('weekly_total.csv')

In [49]:
pop_full = pd.read_csv('ONS-population_2021-08-05.csv')
pop_full = (pop_full[(pop_full['category']=='ALL') & (pop_full['areaCode'].isin(['E12000001', \
    'E12000002', 'E12000003', 'E12000004', 'E12000005','E12000006', 'E12000007', 'E12000008', 'E12000009']))]).drop(columns=['category','gender','age'])
pop_full.rename(columns={'areaCode':'area_code'},inplace=True)

cases_nhs = (pd.read_csv('newCasesPCROnlyBySpecimenDate_region_2021.csv')).drop(columns=['area_type','metric_name','metric'])
cases_nhs['date'] = pd.to_datetime(cases_nhs['date'])
cases_nhs.set_index('date',inplace=True)
weekly_counts_total_nhs = cases_nhs.groupby(['area_code','area_name']).rolling(7).sum().reset_index()
weekly_counts_total_nhs

Unnamed: 0,area_code,area_name,date,value
0,E12000001,North East,2021-01-01,
1,E12000001,North East,2021-01-02,
2,E12000001,North East,2021-01-03,
3,E12000001,North East,2021-01-04,
4,E12000001,North East,2021-01-05,
...,...,...,...,...
3280,E12000009,South West,2021-12-27,44380.0
3281,E12000009,South West,2021-12-28,46296.0
3282,E12000009,South West,2021-12-29,49150.0
3283,E12000009,South West,2021-12-30,51772.0


In [50]:
weekly_counts_total_nhs = pd.merge(weekly_counts_total_nhs,pop_full,on='area_code')
weekly_counts_total_nhs['incidence'] = weekly_counts_total_nhs['value']/weekly_counts_total_nhs['population']
national_mean_total_cases = weekly_counts_total_nhs.groupby(['date'])['incidence'].mean().reset_index()
national_mean_total_cases.rename(columns={'incidence': 'national_mean_incidence'}, inplace=True)
weekly_counts_total_nhs = pd.merge(weekly_counts_total_nhs,national_mean_total_cases,on='date')
weekly_counts_total_nhs['detrended'] = weekly_counts_total_nhs['incidence'] - weekly_counts_total_nhs['national_mean_incidence']

wide_data_total_nhs = weekly_counts_total_nhs.pivot(index='date', columns=['area_name'], values='detrended')
wide_data_incidence_nhs = weekly_counts_total_nhs.pivot(index='date', columns=['area_name'], values='incidence')

wide_data_incidence_nhs.to_csv('wide_incidence_nhs.csv')
wide_data_total_nhs.to_csv('wide_total_nhs.csv')
weekly_counts_total_nhs.to_csv('weekly_total_nhs.csv')

In [103]:
cases_nhs_full = pd.read_csv('cases_total.csv')
cases_nhs_full['date'] = pd.to_datetime(cases_nhs_full['date'])
cases_nhs_full.set_index('date',inplace=True)
npis = pd.read_csv('ltla_to_nhs.csv')
manual_dat = pd.DataFrame({'ltla20nm': ['Wycombe','South Bucks','Chiltern','Aylesbury Vale','Cornwall and Isles of Scilly', 'Hackney and City of London'], \
                           'region': ['South East', 'South East', 'South East', 'South East', 'South West', 'London']})
npis = pd.concat([npis,manual_dat])
pop_nhs = pd.read_csv('census_estimate.csv')
ltla_pop = pd.read_csv('newCasesPCROnlyBySpecimenDate_ltla_2020.csv')
ltla_pop = ltla_pop[['area_code','area_name']]
ltla_pop.drop_duplicates(inplace=True)
ltla_pop = pd.merge(ltla_pop, npis, left_on='area_name', right_on='ltla20nm').drop(columns='ltla20nm')
ltla_pop = pd.merge(ltla_pop, pop_nhs)
nhs_pop = ltla_pop.groupby('region').sum().drop(columns=['area_code','area_name','Unnamed: 0'])
nhs_pop.reset_index(inplace=True)

cases_region = pd.merge(cases_nhs_full.reset_index(), npis, left_on='area_name', right_on='ltla20nm')
cases_region.drop(columns=['ltla20nm','area_name','area_code','Unnamed: 0'],inplace=True)
cases_region = cases_region.groupby(['date','region']).sum().reset_index().set_index('date')
weekly_cases_region = cases_region.groupby('region').rolling(7).sum().reset_index()
weekly_cases_region = pd.merge(weekly_cases_region,nhs_pop,on='region').set_index('date')
weekly_cases_region['incidence'] = weekly_cases_region['value'] / weekly_cases_region['population']

national_weekly_mean = weekly_cases_region.reset_index().groupby('date')['incidence'].mean().reset_index().rename(columns={'incidence': 'national_mean_incidence'})
weekly_cases_region.reset_index(inplace=True)
weekly_cases_region = pd.merge(weekly_cases_region,national_weekly_mean,on='date')
weekly_cases_region['detrended'] = weekly_cases_region['incidence'] - weekly_cases_region['national_mean_incidence']

In [106]:
wide_total_nhs = weekly_cases_region.pivot(index='date', columns=['region'], values='detrended')
wide_incidence_nhs = weekly_cases_region.pivot(index='date', columns=['region'], values='incidence')

wide_incidence_nhs.to_csv('wide_incidence_region.csv')
wide_total_nhs.to_csv('wide_total_region.csv')
weekly_cases_region.to_csv('weekly_total_region.csv')