In [1]:
import datetime
import itertools
import operator
import re
from collections import defaultdict
from pathlib import Path
from urllib.error import HTTPError

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import requests
import xmltodict
from IPython.display import display

In [2]:
def get_england_deaths_df(date: datetime.date) -> pd.DataFrame:
    """Return a DataFrame of total daily deaths per NHS England region.
    
    The datasource is from NHS England at:
    https://www.england.nhs.uk/statistics/statistical-work-areas/covid-19-daily-deaths/
    """
    url = f"https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2020/{date:%m}/" \
          f"COVID-19-total-announced-deaths-{date:%-d-%B-%Y}.xlsx"
    
    deaths_df = pd.read_excel(
        url,
        sheet_name="COVID19 total deaths by region",
        header=15,
        index_col=1
    )

    
    # tidy up the raw dataframe by:
    #   1. removing any fully empty rows
    #   2. dropping extraneous columns
    #   3. converting type to int
    deaths_df = (
        deaths_df
        .dropna(how="all", axis="index")
        .dropna(how="all", axis="columns")
        .drop(["Up to 01-Mar-20", "Awaiting verification", "Total"], axis="columns")
        .drop(["England"], axis="index")
        .astype(int)
        .T
    )
    
    # smarten up the index and columns
    deaths_df.index = pd.to_datetime(deaths_df.index, dayfirst=True).rename("date")
    deaths_df = deaths_df.rename(columns={
        "East Of England": "East of England", 
        "North East And Yorkshire": "North East and Yorkshire"
    })
    
    # now give a breakdown of deaths in terms of daily cumulative total as well as the new cases per day
    columns = pd.MultiIndex.from_product(
        [deaths_df.columns, ["deaths"], ["new"]], names=["area_name", "measure", "submeasure"]
    )
    deaths_df.columns = columns
        
    for area in deaths_df.columns.get_level_values("area_name"):
        deaths_df[area, "deaths", "total"] = deaths_df[area, "deaths", "new"].cumsum()    

    return deaths_df

In [3]:
# deaths data taken from https://www.england.nhs.uk/statistics/statistical-work-areas/covid-19-daily-deaths/
today = datetime.date.today()
last_week = today - datetime.timedelta(days=7)
date_range = pd.date_range(start=last_week, end=today, freq="D")

for date in date_range[::-1]:
    try:
        england_deaths_df = get_england_deaths_df(date)
    except HTTPError:
        # data not available for the current day, try again with the day before
        continue
    else:
        break

england_deaths_df.tail()

area_name,East of England,London,Midlands,North East and Yorkshire,North West,South East,South West,East of England,London,Midlands,North East and Yorkshire,North West,South East,South West
measure,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
submeasure,new,new,new,new,new,new,new,total,total,total,total,total,total,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
2020-04-27,45,47,67,58,51,35,13,2284,4981,3925,2658,2922,2308,900
2020-04-28,46,40,65,50,49,37,15,2330,5021,3990,2708,2971,2345,915
2020-04-29,25,39,48,62,57,35,12,2355,5060,4038,2770,3028,2380,927
2020-04-30,28,28,37,46,41,20,19,2383,5088,4075,2816,3069,2400,946
2020-05-01,14,10,9,23,14,4,1,2397,5098,4084,2839,3083,2404,947


In [4]:
# Non-England-deaths data taken from PHE's tracker: https://coronavirus.data.gov.uk

covid_data_url = f"https://c19downloads.azureedge.net/downloads/data/data_latest.json"

covid_data = requests.get(covid_data_url).json()

In [5]:
swni_deaths_data = defaultdict(dict)
for country in covid_data["countries"].values():
    country_name = country["name"]["value"]
    if country_name != "England":
        swni_deaths_data[country_name]["daily_total_deaths"] = (
            pd.DataFrame(country["dailyTotalDeaths"])
            .astype({"date": "datetime64"})
            .set_index("date")
            .asfreq("D", method="ffill")
            .rename(columns={"value": country["name"]["value"]})
        )
        
countries = swni_deaths_data.keys()
swni_deaths_df = pd.concat([swni_deaths_data[country]["daily_total_deaths"] for country in countries], axis=1)
swni_deaths_df = swni_deaths_df.fillna(0.).astype(int)

cols = pd.MultiIndex.from_product(
    [swni_deaths_df.columns, ["deaths"], ["total"]], names=["area_name", "measure", None]
)
swni_deaths_df.columns = cols

# the PHE data gives as the daily running total; add in a column for daily new cases
areas = swni_deaths_df.columns.get_level_values(0)
for area in areas:
    swni_deaths_df[area, "deaths", "new"] = swni_deaths_df[area, "deaths", "total"].diff()
    
swni_deaths_df = (
    swni_deaths_df
    .sort_index(axis=1)
    .fillna(method="bfill", axis=1)
    .astype(int)
)

swni_deaths_df.tail()

area_name,Northern Ireland,Northern Ireland,Scotland,Scotland,Wales,Wales
measure,deaths,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_2,new,total,new,total,new,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
2020-04-28,10,309,13,1262,17,813
2020-04-29,20,329,70,1332,73,886
2020-04-30,9,338,83,1415,22,908
2020-05-01,9,347,60,1475,17,925
2020-05-02,18,365,40,1515,44,969


In [6]:
# missing time series cases data for Scotland, Wales and NI
swni_df = swni_deaths_df
for area in areas:
    swni_df[area, "cases", "new"] = np.nan
    swni_df[area, "cases", "total"] = np.nan
    
swni_df.sort_index(axis=1).tail()

area_name,Northern Ireland,Northern Ireland,Northern Ireland,Northern Ireland,Scotland,Scotland,Scotland,Scotland,Wales,Wales,Wales,Wales
measure,cases,cases,deaths,deaths,cases,cases,deaths,deaths,cases,cases,deaths,deaths
Unnamed: 0_level_2,new,total,new,total,new,total,new,total,new,total,new,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
2020-04-28,,,10,309,,,13,1262,,,17,813
2020-04-29,,,20,329,,,70,1332,,,73,886
2020-04-30,,,9,338,,,83,1415,,,22,908
2020-05-01,,,9,347,,,60,1475,,,17,925
2020-05-02,,,18,365,,,40,1515,,,44,969


In [7]:
deaths_df = pd.concat([swni_deaths_df, england_deaths_df], axis=1)#.astype(pd.Int32Dtype())
deaths_df

area_name,Northern Ireland,Northern Ireland,Scotland,Scotland,Wales,Wales,Northern Ireland,Northern Ireland,Scotland,Scotland,...,North West,South East,South West,East of England,London,Midlands,North East and Yorkshire,North West,South East,South West
measure,deaths,deaths,deaths,deaths,deaths,deaths,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_2,new,total,new,total,new,total,new,total,new,total,...,new,new,new,total,total,total,total,total,total,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2020-03-01,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-03-02,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-03-03,,,,,,,,,,,...,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0
2020-03-04,,,,,,,,,,,...,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0
2020-03-05,,,,,,,,,,,...,1.0,1.0,0.0,2.0,0.0,1.0,0.0,1.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-28,10.0,309.0,13.0,1262.0,17.0,813.0,,,,,...,49.0,37.0,15.0,2330.0,5021.0,3990.0,2708.0,2971.0,2345.0,915.0
2020-04-29,20.0,329.0,70.0,1332.0,73.0,886.0,,,,,...,57.0,35.0,12.0,2355.0,5060.0,4038.0,2770.0,3028.0,2380.0,927.0
2020-04-30,9.0,338.0,83.0,1415.0,22.0,908.0,,,,,...,41.0,20.0,19.0,2383.0,5088.0,4075.0,2816.0,3069.0,2400.0,946.0
2020-05-01,9.0,347.0,60.0,1475.0,17.0,925.0,,,,,...,14.0,4.0,1.0,2397.0,5098.0,4084.0,2839.0,3083.0,2404.0,947.0


In [8]:
# extract the relevant data, and parse it into a more user-friendly format, from the PHE dataset
england_regional_data = covid_data["regions"]
england_regional_data = {
    region["name"]["value"]: {
    #"daily_new_cases": region.get("dailyConfirmedCases", []),
    "daily_total_cases": (
        pd.DataFrame(
        region.get(
            "dailyTotalConfirmedCases", 
            [{"date": None, "value": None}]
        )
        )
        .astype({"date": "datetime64"})
        .set_index("date")
        .asfreq("D", method="ffill")
        .rename(columns={"value": region["name"]["value"]})
    ),
    #"daily_new_deaths": region.get("dailyDeaths", []),
    #"daily_total_deaths": region.get("dailyTotalDeaths", []),
} for region in england_regional_data.values()}

# compile a dataframe of cases in England from the PHE JSON data
areas = england_regional_data.keys()
england_cases_df = pd.concat([england_regional_data[area]["daily_total_cases"] for area in areas], axis=1)

# the PHE data divides England up into regions which are very similar but not exactly the same as NHS England regions.
# To make it comparable to other data we are using (for deaths), it would be best to merge regions together to
# match that of the NHS England regions. This isn't 100% accurate - South Cumbria is not quite in the right region,
# for example, but by and large it is comparable.
regions_to_merge = {
    "Midlands": ["West Midlands", "East Midlands"],
    "North East and Yorkshire": ["Yorkshire and The Humber", "North East"]
}

# create the new regions to match the NHS England regions before dropping the old constituent regions
# and doing a bit of house-keeping
for new_region, old_regions in regions_to_merge.items():
    england_cases_df[new_region] = england_cases_df[old_regions].sum(axis=1)
    
england_cases_df = england_cases_df.drop(columns=itertools.chain.from_iterable(regions_to_merge.values()))
england_cases_df = england_cases_df.fillna(0.)
cols = pd.MultiIndex.from_product(
    [england_cases_df.columns, ["cases"], ["total"]], names=["area_name", "measure", "submeasure"]
)
england_cases_df.columns = cols

# the PHE data gives as the daily running total; add in a column for daily new cases
areas = england_cases_df.columns.get_level_values(0)
for area in areas:
    england_cases_df[area, "cases", "new"] = england_cases_df[area, "cases", "total"].diff()
    
england_cases_df = (
    england_cases_df
    .sort_index(axis=1)
    .fillna(method="bfill", axis=1)
    .astype(int)
)

england_cases_df

area_name,East of England,East of England,London,London,Midlands,Midlands,North East and Yorkshire,North East and Yorkshire,North West,North West,South East,South East,South West,South West
measure,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases
submeasure,new,total,new,total,new,total,new,total,new,total,new,total,new,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
2020-01-30,0,0,0,0,0,0,1,1,0,0,0,0,0,0
2020-01-31,0,0,0,0,0,0,0,1,0,0,1,1,0,0
2020-02-01,0,0,0,0,0,0,0,1,0,0,0,1,0,0
2020-02-02,0,0,0,0,0,0,0,1,0,0,0,1,0,0
2020-02-03,1,1,0,0,0,0,1,2,0,0,1,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-27,226,10396,184,24281,418,19592,503,17488,380,18591,196,16776,128,6136
2020-04-28,184,10580,165,24446,372,19964,410,17898,399,18990,276,17052,109,6245
2020-04-29,135,10715,191,24637,309,20273,388,18286,344,19334,172,17224,109,6354
2020-04-30,63,10778,60,24697,116,20389,208,18494,99,19433,89,17313,46,6400


In [9]:
# merge the cases and deaths dataframes into one
deaths_and_cases_df = pd.concat([england_cases_df, deaths_df], axis=1, levels=["measure"])#.astype(pd.Int32Dtype()) 
deaths_and_cases_df = deaths_and_cases_df.sort_index(axis=1)
deaths_and_cases_df.tail()

area_name,East of England,East of England,East of England,East of England,London,London,London,London,Midlands,Midlands,...,South East,South East,South West,South West,South West,South West,Wales,Wales,Wales,Wales
measure,cases,cases,deaths,deaths,cases,cases,deaths,deaths,cases,cases,...,deaths,deaths,cases,cases,deaths,deaths,cases,cases,deaths,deaths
submeasure,new,total,new,total,new,total,new,total,new,total,...,new,total,new,total,new,total,new,total,new,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2020-04-28,184.0,10580.0,46.0,2330.0,165.0,24446.0,40.0,5021.0,372.0,19964.0,...,37.0,2345.0,109.0,6245.0,15.0,915.0,,,17.0,813.0
2020-04-29,135.0,10715.0,25.0,2355.0,191.0,24637.0,39.0,5060.0,309.0,20273.0,...,35.0,2380.0,109.0,6354.0,12.0,927.0,,,73.0,886.0
2020-04-30,63.0,10778.0,28.0,2383.0,60.0,24697.0,28.0,5088.0,116.0,20389.0,...,20.0,2400.0,46.0,6400.0,19.0,946.0,,,22.0,908.0
2020-05-01,7.0,10785.0,14.0,2397.0,3.0,24700.0,10.0,5098.0,17.0,20406.0,...,4.0,2404.0,5.0,6405.0,1.0,947.0,,,17.0,925.0
2020-05-02,,,,,,,,,,,...,,,,,,,,,44.0,969.0


In [10]:
# NHS England population statistics taken from:
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/clinicalcommissioninggroupmidyearpopulationestimates
#
# Scotland, Wales and Northern Ireland population statistics taken from:
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
regional_populations = {
    "London": 8_908_081,
    "North West": 7_012_947,
    "North East and Yorkshire": 8_566_925,
    "Midlands": 10_537_679,
    "East of England": 6_493_188,
    "South East": 8_852_361,
    "South West": 5_605_997,
    "England": 55_977_178,
    "Wales": 3_136_400,
    "Scotland": 5_454_000,
    "Northern Ireland": 1_876_000,
}

countries = ["England", "Scotland", "Wales", "Northern Ireland"]
regional_populations["UK"] = sum(regional_populations[country] for country in countries)

In [11]:
# add prevalence and case fatality rates
complete_df = deaths_and_cases_df
areas = deaths_and_cases_df.columns.get_level_values(level="area_name").unique()
for area in areas:
    complete_df[area, "cases", "prevalence"] = (
        deaths_and_cases_df[area, "cases", "total"] / regional_populations[area]
    )
    complete_df[area, "deaths", "case_fatality_rate"] = (
        deaths_and_cases_df[area, "deaths", "total"] / deaths_and_cases_df[area, "cases", "total"]
    )
complete_df = complete_df.sort_index(axis=1)
complete_df.tail()

area_name,East of England,East of England,East of England,East of England,East of England,East of England,London,London,London,London,...,South West,South West,South West,South West,Wales,Wales,Wales,Wales,Wales,Wales
measure,cases,cases,cases,deaths,deaths,deaths,cases,cases,cases,deaths,...,cases,deaths,deaths,deaths,cases,cases,cases,deaths,deaths,deaths
submeasure,new,prevalence,total,case_fatality_rate,new,total,new,prevalence,total,case_fatality_rate,...,total,case_fatality_rate,new,total,new,prevalence,total,case_fatality_rate,new,total
date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2020-04-28,184.0,0.001629,10580.0,0.220227,46.0,2330.0,165.0,0.002744,24446.0,0.205391,...,6245.0,0.146517,15.0,915.0,,,,,17.0,813.0
2020-04-29,135.0,0.00165,10715.0,0.219785,25.0,2355.0,191.0,0.002766,24637.0,0.205382,...,6354.0,0.145892,12.0,927.0,,,,,73.0,886.0
2020-04-30,63.0,0.00166,10778.0,0.221099,28.0,2383.0,60.0,0.002772,24697.0,0.206017,...,6400.0,0.147813,19.0,946.0,,,,,22.0,908.0
2020-05-01,7.0,0.001661,10785.0,0.222253,14.0,2397.0,3.0,0.002773,24700.0,0.206397,...,6405.0,0.147853,1.0,947.0,,,,,17.0,925.0
2020-05-02,,,,,,,,,,,...,,,,,,,,,44.0,969.0


In [15]:
fig = go.Figure()

total_cases_traces = []
total_deaths_traces = []
prevalence_traces = []
case_fatality_rate_traces = []

total_cases_df = (
    complete_df.xs("cases", level="measure", axis=1)
    .dropna(how="all", axis=0)  # trim the start and end dates
    .dropna(how="all", axis=1)  # drop columns with no data
)

for area, area_cases_df in total_cases_df.groupby(level="area_name", axis=1):
    text = (
        area_cases_df.index.to_series().dt.strftime("%-d %B") + " | " + area + "<br>" \
        "Total cases: " + area_cases_df[area, "total"].apply("{:,.0f}".format) + \
        " (" + area_cases_df[area, "new"].apply("{:+,.0f}".format) + ")"
    )
    
    total_cases_traces.append(
        go.Scatter(
            x=area_cases_df.index,
            y=area_cases_df[area, "total"],
            name=area,
            visible=True,
            text=text,
            hovertemplate="%{text}<extra></extra>"
        )
    )

total_deaths_df = (
    complete_df.xs("deaths", level="measure", axis=1)
    .dropna(how="all", axis=0)  # trim the start and end dates
    .dropna(how="all", axis=1)  # drop columns with no data
)

for area, area_deaths_df in total_deaths_df.groupby(level="area_name", axis=1):
    text = (
        area_deaths_df.index.to_series().dt.strftime("%-d %B") + " | " + area + "<br>" \
        "Total deaths: " + area_deaths_df[area, "total"].apply("{:,.0f}".format) + \
        " (" + area_deaths_df[area, "new"].apply("{:+,.0f}".format) + ")"
    )
    total_deaths_traces.append(
        go.Scatter(
            x=area_deaths_df.index,
            y=area_deaths_df[area, "total"],
            name=area,
            # hide all deaths traces by default
            visible=False,
            text=text,
            hovertemplate="%{text}<extra></extra>"
        )
    )

prevalence_df = (
    complete_df.xs(("cases", "prevalence"), level=["measure", "submeasure"], axis=1)
    .dropna(how="all", axis=0)  # trim the start and end dates
    .dropna(how="all", axis=1)  # drop columns with no data
)

for area, prevalence_series in prevalence_df.iteritems():
    text = (
        prevalence_series.index.to_series().dt.strftime("%-d %B") + " | " + area + "<br>" \
        "Prevalence: " + prevalence_series.apply("{:.1%}".format)
    )
    prevalence_traces.append(
        go.Scatter(
            x=prevalence_series.index,
            y=prevalence_series,
            name=area,
            # hide all prevalence traces by default
            visible=False,
            text=text,
            hovertemplate="%{text}<extra></extra>"
        )
    )
    
case_fatality_rate_df = (
    complete_df.xs(("deaths", "case_fatality_rate"), level=["measure", "submeasure"], axis=1)
    .dropna(how="all", axis=0)  # trim the start and end dates
    .dropna(how="all", axis=1)  # drop columns with no data
)

for area, case_fatality_rate_series in case_fatality_rate_df.iteritems():
    text = (
        case_fatality_rate_series.index.to_series().dt.strftime("%-d %B") + " | " + area + "<br>" \
        "Case fatality rate: " + case_fatality_rate_series.apply("{:.1%}".format)
    )
    case_fatality_rate_traces.append(
        go.Scatter(
            x=case_fatality_rate_series.index,
            y=case_fatality_rate_series,
            name=area,
            # hide all case fatality rate traces by default
            visible=False,
            text=text,
            hovertemplate="%{text}<extra></extra>"
        )
    )

# add all the traces
for trace in total_cases_traces + total_deaths_traces + prevalence_traces + case_fatality_rate_traces:
    fig.add_trace(trace)

# add a vertical line showing when the UK went into "lockdown"...
fig.add_shape(
    type="line",
    xref="x",
    yref="paper",
    x0=datetime.date(2020, 3, 23),
    y0=0,
    x1=datetime.date(2020, 3, 23),
    y1=0.9,
    opacity=0.8,
)    

# ...and then annotate it with some text
lockdown_annotation = dict(
    xref="x",
    yref="paper",
    x=datetime.date(2020, 3, 23),
    y=0.7,
    text="Lockdown commenced 23/03/2020",
    font_color="#000",
    showarrow=True,
    align="center",
    arrowhead=2,
    arrowsize=1,
    arrowwidth=1,
    arrowcolor="#636363",
    ax=-20,
    ay=0,
    xanchor="right",
    opacity=0.8,
)

total_cases_series = (
    complete_df.xs(("cases", "total"), level=["measure", "submeasure"], axis=1)
    .dropna(how="all", axis=1)
    .fillna(method="ffill")
    .iloc[-1]
)
latest_total_cases = total_cases_series.sum()
total_cases_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total England cases as of {total_cases_series.name:%-d %B %Y}:<br>" \
         f"{latest_total_cases:,.0f}",
    font_color="#000",
    showarrow=False,
    align="left"
)

total_deaths_series = (
    complete_df.xs(("deaths", "total"), level=["measure", "submeasure"], axis=1)
    .dropna(how="all", axis=1)
    .fillna(method="ffill")
    .iloc[-1]
)
latest_total_deaths = total_deaths_series.sum()
total_deaths_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total UK deaths as of {total_deaths_series.name:%-d %B %Y}:<br>" \
         f"{latest_total_deaths:,.0f}",
    font_color="#000",
    showarrow=False,
    align="left"
)

latest_total_prevalence = latest_total_cases / regional_populations["England"]
total_prevalence_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total England period prevalence as of {total_cases_series.name:%-d %B %Y}:<br>" \
         f"{latest_total_prevalence:.2%}",
    font_color="#000",
    showarrow=False,
    align="left"
)

latest_case_fatality_rate = latest_total_deaths / latest_total_cases
total_case_fatality_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total England case fatality rate as of " \
         f"{max([total_cases_series.name,total_deaths_series.name]):%-d %B %Y}:<br>" \
         f"{latest_case_fatality_rate:.1%}",
    font_color="#000",
    showarrow=False,
    align="left"
)

fig.update_layout(
    updatemenus=[{
        "type": "buttons",
        "direction": "right",
        "x": 0.505,
        "y": 1.125,
        "buttons": list([
            {
                "label": "Cases",
                "method": "update",
                "args": [
                    {"visible": [True] * 7 + [False] * (21 + 3)}, 
                    {
                        "title": "Confirmed Covid-19 Cases Per Region",
                        "yaxis": {"title": "Number of Confirmed Cases"},
                        "annotations": [lockdown_annotation] + [total_cases_annotation],
                    }
                ]
            },
            {
                "label": "Deaths",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * 7 + [True] * 10 + [False] * 14,                    }, 
                    {
                        "title": "Covid-19 Deaths Per Region",
                        "yaxis": {"title": "Number of Deaths", "tickformat": ",d"},
                        "annotations": [lockdown_annotation] + [total_deaths_annotation],
                    }
                ]
            },
            {
                "label": "Prevalence",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * 17 + [True] * 7 + [False] * 7
                    },
                    {
                        "title": "Covid-19 Prevalence Per Region",
                        "yaxis": {"title": "Prevalence", "tickformat": ".2%"},
                        "annotations": [lockdown_annotation] + [total_prevalence_annotation],
                    }
                ],
            },
            {
                "label": "Case Fatality Rate",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * (21 + 3) + [True] * 7
                    },
                    {
                        "title": "Covid-19 Case Fatality Rate Per Region",
                        "yaxis": {"title": "Case Fatality Rate", "tickformat": ".1%"},
                        "annotations": [lockdown_annotation] + [total_case_fatality_annotation],
                    }
                ],
            }
        ])
    }],
    title={
        "text": "Confirmed Covid-19 Cases Per Region",
        "x": 0.45,
    },
    xaxis={
        "title": "Date",
        "tickformat": '%d %b',
        "tickangle": -45,
    },
    yaxis={
        "title": "Number of Confirmed Cases",
        "tickformat": ",d",
    },
    hovermode="closest",
    annotations=[lockdown_annotation, total_cases_annotation],
)
    

fig.show()

In [21]:
fig = go.Figure(
    layout={
        "title": {
            "text": "Daily Number of New Covid-19 Cases Per Region",
            "x": 0.5
        },
        "xaxis": {
            "title": "Date",
            "tickformat": '%d %b',
            "tickangle": -45
        },
        "yaxis": {
            "title": "Daily Number of New Cases",
            "tickformat": ',d',
        },
        "legend": {
            "x": 0,
            "y": 1,
            "bgcolor": "RGBA(0,0,0,0)"
        },
        "hovermode": "closest"
    }
)

new_cases_traces = []
new_deaths_traces = []

total_cases_df = (
    complete_df.xs(("cases", "new"), level=["measure", "submeasure"], axis=1)
    .dropna(how="all", axis=0)  # trim the start and end dates
    .dropna(how="all", axis=1)  # drop columns with no data
)

for area, new_cases_series in total_cases_df.iteritems():
    text = (
        new_cases_series.index.to_series().dt.strftime("%-d %B") + " | " + area + "<br>" \
        "New cases: " + new_cases_series.apply("{:,.0f}".format)
    )
    
    new_cases_traces.append(
        go.Bar(
            x=new_cases_series.index,
            y=new_cases_series,
            name=area,
            visible=True if area == "South West" else "legendonly",
            text=text,
            hovertemplate="%{text}<extra></extra>"
        )
    )

total_deaths_df = (
    complete_df.xs(("deaths", "new"), level=["measure", "submeasure"], axis=1)
    .dropna(how="all", axis=0)  # trim the start and end dates
    .dropna(how="all", axis=1)  # drop columns with no data
)

for area, new_deaths_series in total_deaths_df.iteritems():
    text = (
        new_deaths_series.index.to_series().dt.strftime("%-d %B") + " | " + area + "<br>" \
        "New deaths: " + new_deaths_series.apply("{:,.0f}".format)
    )
    
    new_deaths_traces.append(
        go.Bar(
            x=new_deaths_series.index,
            y=new_deaths_series,
            name=area,
            # hide new deaths traces by default
            visible=False,
            text=text,
            hovertemplate="%{text}<extra></extra>"
        )
    )

for trace in new_cases_traces + new_deaths_traces:
    fig.add_trace(trace)

# add a vertical line showing when the UK went into "lockdown"...
fig.add_shape(
    type="line",
    xref="x",
    yref="paper",
    x0=datetime.date(2020, 3, 23),
    y0=0,
    x1=datetime.date(2020, 3, 23),
    y1=0.9,
    opacity=0.8,
)    

# ...and then annotate it with some text
fig.add_annotation(
    xref="x",
    yref="paper",
    x=datetime.date(2020, 3, 23),
    y=0.45,
    text="Lockdown commenced 23/03/2020",
    font_color="#000",
    showarrow=True,
    align="center",
    arrowhead=2,
    arrowsize=1,
    arrowwidth=1,
    arrowcolor="#636363",
    ax=-20,
    ay=0,
    xanchor="right",
    opacity=0.8,
)

fig.update_layout(
    updatemenus=[{
        "type": "buttons",
        "direction": "right",
        "x": 0.225,
        "y": 1.125,
        "buttons": list([
            {
                "label": "New Cases",
                "method": "update",
                "args": [
                    {
                        "visible": [True] + ["legendonly"] * 6 + [False] * 10
                    }, 
                    {
                        "title": "Daily Number of New Covid-19 Cases Per Region",
                        "yaxis": {"title": "Number of New Confirmed Cases", "tickformat": ",d"},
                    }
                ]
            },
            {
                "label": "New Deaths",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * 7 + ["True"] + ["legendonly"] * 9,
                    }, 
                    {
                        "title": "Daily Number of New Covid-19 Deaths Per Region",
                        "yaxis": {"title": "Number of New Deaths", "tickformat": ",d"},
                    }
                ]
            }
        ])
    }]
)
    
fig