In [None]:
# UK COVID-19 cases/deaths plotted as heatmaps
# 2020 Nick Plummer (nickopotamus.co.uk)
# Modified from Jason Bowling's US heatmap https://github.com/JasonRBowling/covid19NewCasesPer100KHeatmap
# See https://towardsdatascience.com/visualization-of-covid-19-new-cases-over-time-in-python-8c6ac4620c88

In [None]:
# Pulls UK data using COVID-19 API hhttps://publichealthengland.github.io/coronavirus-dashboard-api-python-sdk/
# To add to Jupyter use:
#    import sys
#    !{sys.executable} -m pip install uk-covid19

In [None]:
# Import required libraries
from uk_covid19 import Cov19API
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt
import pandas as pd
import datetime
import dateutil

In [None]:
# Population from 2019 census data 
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/bulletins/annualmidyearpopulationestimates/latest

regionPopns = [['London',8961989],
               ['South East',9180135],
               ['East of England',6236072],
               ['South West', 5624696],
               ['North West', 7341196],
               ['East Midlands', 4835928],
               ['West Midlands', 5934037],
               ['Yorkshire and The Humber', 5502967],
               ['North East', 2669941],
               ['Scotland', 5463300],
               ['Wales', 3152879],
               ['Northern Ireland', 1893667]]

areaPopulations = {}
for row in regionPopns:
    key = row[0]
    if key in areaPopulations:
        pass
    areaPopulations[key] = row[1:]

In [None]:
# Filter
all_regions = ["areaType=region"] # English regions
scotland_only = ['areaType=nation', 'areaName=Scotland']
wales_only = ['areaType=nation', 'areaName=Wales']
ni_only = ['areaType=nation', 'areaName=Northern Ireland']

# Data structure
cases_only = {
    "date": "date",
    "area": "areaName",
    "cases": "newCasesByPublishDate"
}

# Instantiations
api_regions = Cov19API(filters=all_regions, structure=cases_only)
api_scotland = Cov19API(filters=scotland_only, structure=cases_only)
api_wales = Cov19API(filters=wales_only, structure=cases_only)
api_ni = Cov19API(filters=ni_only, structure=cases_only)

# Get data and combine
data_regions = api_regions.get_dataframe()
data_scotland = api_scotland.get_dataframe()
data_wales = api_wales.get_dataframe()
data_ni = api_ni.get_dataframe()
full_data = pd.concat([data_regions, data_scotland, data_wales, data_ni])

print(full_data)

In [None]:
# Get data release timestamp and extract just the date
release_timestamp = dateutil.parser.parse(Cov19API.get_release_timestamp()).date().strftime("%d-%m-%Y")
print(release_timestamp)

In [None]:
dates = full_data['date'].unique().tolist()
areas = full_data['area'].unique().tolist()

result = pd.DataFrame()
result['date'] = full_data['date']
areas.sort()

data = full_data
for area in areas:
    population = int(areaPopulations[area][0])
    print(area + ": " + str(population))
    areaData = data[data.area.eq(area)]

    newColumnName = area
    areaData[newColumnName] = areaData.cases
    areaData[newColumnName] = areaData[newColumnName].replace(np.nan, 0)
    areaData = areaData.drop(['area'], axis=1)
    areaData = areaData.drop(['cases'], axis=1)

    # Scale to cases per 100,000 population
    areaData[newColumnName] = areaData[newColumnName].div(population)
    areaData[newColumnName] = areaData[newColumnName].mul(100000.0)

    result = pd.merge(result, areaData, how='left', on='date')

In [None]:
# Tidy up table
result = result.drop_duplicates()
result = result.fillna(0)

# log(x+1) transform to improve readability
for area in areas:
    result[area] = result[area].add(1.0)
    result[area] = np.log10(result[area])
    
# Timeseriesify, and start after T1/2 data combined
result = result.sort_values(by=['date'], ascending=True)
result['date'] = pd.to_datetime(result['date'])
result = result[result['date'] >= '2020-07-02']
result['date'] = result['date'].dt.strftime('%Y-%m-%d')

# Transpose table
result.set_index('date', inplace=True)
result = result.transpose()

print(result)

In [None]:
%matplotlib inline

plt.figure(figsize=(20, 10))
g = sns.heatmap(result, cmap="coolwarm", linewidth=0.05, linecolor='lightgrey')
plt.xlabel('')
plt.ylabel('')

plt.title("Daily new COVID-19 cases by region /100,000 population", fontsize=20)

updateText = "Updated: " + release_timestamp + \
". Data source: coronavirus.data.gov.uk. Visualisation by @Nickopotamus, modified from @JRBowling"

plt.suptitle(updateText, fontsize=10)

plt.yticks(np.arange(.5, (result.shape[0]+0.5), 1.0), areas)

plt.yticks(fontsize=8)
plt.xticks(fontsize=8)
g.set_xticklabels(g.get_xticklabels(), rotation=90)
g.set_yticklabels(g.get_yticklabels(), rotation=0)
plt.savefig(f"UK-{release_timestamp}-cases.png")