# UK local authority COVID-19 cases plotted as heatmaps
2020 Nick Plummer (nickopotamus.co.uk)
Modified from Jason Bowling's US heatmap https://github.com/JasonRBowling/covid19NewCasesPer100KHeatmap
See https://towardsdatascience.com/visualization-of-covid-19-new-cases-over-time-in-python-8c6ac4620c88
Pulls UK data using COVID-19 API hhttps://publichealthengland.github.io/coronavirus-dashboard-api-python-sdk/

In [None]:
# To add to Jupyter use:
#    import sys
#    !{sys.executable} -m pip install uk-covid19

In [None]:
from uk_covid19 import Cov19API
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt
import pandas as pd
import datetime
import dateutil

## Get baseline LTLA data

In [None]:
# Import Lower Tier Local Authority List
# Populations from 2019 census data 
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/bulletins/annualmidyearpopulationestimates/latest
# Correlation to regions from ONS Open Geography Portal
# https://geoportal.statistics.gov.uk/datasets/local-authority-district-to-region-april-2019-lookup-in-england?orderBy=LAD19NM&page=7

ltlaRegions = pd.read_csv('ltlaRegions.csv')
ltlaRegions['population'] = pd.to_numeric(ltlaRegions['population'])

# Get list of regions and shorthand names
regions = list(ltlaRegions.groupby("region").groups.keys())

# Pull first letter of each word in region name
codes = []
for area in regions:
    codes.append(''.join(word[0] for word in area.split()).upper())

# TODO: Turn this into a lookup table
regionCodes = list(zip(codes,regions))
print(regionCodes)

In [None]:
# Filter LTLAs into regions
# TODO: Must be a quicker way of doing this as a for loop using the _regionCodes_ list
# Probably using a dictionary eg d = {} for x in range(1, 10): d["string{0}".format(x)] = "Hello"
popn_EM = ltlaRegions[ltlaRegions['region']=='East Midlands'][['area','population']]
popn_EOE = ltlaRegions[ltlaRegions['region']=='East of England'][['area','population']]
popn_L = ltlaRegions[ltlaRegions['region']=='London'][['area','population']]
popn_NE = ltlaRegions[ltlaRegions['region']=='North East'][['area','population']]
popn_NW = ltlaRegions[ltlaRegions['region']=='North West'][['area','population']]
popn_S = ltlaRegions[ltlaRegions['region']=='Scotland'][['area','population']]
popn_SE = ltlaRegions[ltlaRegions['region']=='South East'][['area','population']]
popn_SW = ltlaRegions[ltlaRegions['region']=='South West'][['area','population']]
popn_WM = ltlaRegions[ltlaRegions['region']=='West Midlands'][['area','population']]
popn_YATH = ltlaRegions[ltlaRegions['region']=='Yorkshire and The Humber'][['area','population']]

## API call for LTLA level data

In [None]:
# TODO: Need to do this as a look up
region_code = "SW"
region_name = "South West"
popn_region = popn_SW

In [None]:
# TODO: Turn this into a loop across all the regions

full_region = []

# Data structure
cases_only = {
    "date": "date",
    "area": "areaName",
    "cases": "newCasesByPublishDate",
}

# Filter for each area within a region, then instantiate
for name in popn_region['area'].to_list():
    print(name)
    regionString = [f'areaType=ltla;areaName={name}']
    api = Cov19API(filters=regionString, structure=cases_only)
    local_data = api.get_dataframe()
    full_region.append(local_data)

# Combine tables
full_region = pd.concat(full_region)
print(full_region)

In [None]:
# Get data release timestamp and extract just the date
release_timestamp = dateutil.parser.parse(Cov19API.get_release_timestamp()).date().strftime("%d-%m-%Y")
print(release_timestamp)

In [None]:
# Save data to avoid having to call API again (takes a long time!)
data = full_region

dates = data['date'].unique().tolist()
areas = data['area'].unique().tolist()

result = pd.DataFrame()
result['date'] = full_data['date']
areas.sort()

areaPopulations = popn_region.set_index("area").T.to_dict("list")
print(areas)

In [None]:
for area in areas:
    population = int(areaPopulations[area][0])
    print(area + ": " + str(population))
    areaData = data[data.area.eq(area)]

    newColumnName = area
    areaData[newColumnName] = areaData.cases
    areaData[newColumnName] = areaData[newColumnName].replace(np.nan, 0)
    areaData = areaData.drop(['area'], axis=1)
    areaData = areaData.drop(['cases'], axis=1)

    # Scale to cases per 100,000 population
    areaData[newColumnName] = areaData[newColumnName].div(population)
    areaData[newColumnName] = areaData[newColumnName].mul(100000.0)

    result = pd.merge(result, areaData, how='left', on='date')

In [None]:
# Tidy up table
result = result.drop_duplicates()
result = result.fillna(0)

# log(x+1) transform to improve readability
for area in areas:
    result[area] = result[area].add(1.0)
    result[area] = np.log10(result[area])
    
# Timeseriesify, and start after T1/2 data combined
result = result.sort_values(by=['date'], ascending=True)
result['date'] = pd.to_datetime(result['date'])
result = result[result['date'] >= '2020-07-02']
result['date'] = result['date'].dt.strftime('%Y-%m-%d')

# Transpose table
result.set_index('date', inplace=True)
result = result.transpose()

print(result)

In [None]:
%matplotlib inline

plt.figure(figsize=(20, 10))
g = sns.heatmap(result, cmap="coolwarm", linewidth=0.05, linecolor='lightgrey')
plt.xlabel('')
plt.ylabel('')

plt.title(f"Daily new COVID-19 cases in {region_name} by LTLA /100,000 population", fontsize=20)

updateText = "Updated: " + release_timestamp + \
". Data source: coronavirus.data.gov.uk. Visualisation by @Nickopotamus, modified from @JRBowling"

plt.suptitle(updateText, fontsize=10)

plt.yticks(np.arange(.5, (result.shape[0]+0.5), 1.0), areas)

plt.yticks(fontsize=8)
plt.xticks(fontsize=8)
g.set_xticklabels(g.get_xticklabels(), rotation=90)
g.set_yticklabels(g.get_yticklabels(), rotation=0)
plt.savefig(f"{region_code}-{release_timestamp}-cases.png")