In [None]:
import json
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns

# import cartopy.crs as crs
# import holoviews  # noqa: F401
# import hvplot.pandas  # noqa: F401
# from holoviews import opts  # noqa: F401

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from covis.utils import get_project_root

# regional totals deaths and deaths due to covid 2020-2022
* again, the data in the files do not have the same schema / format, so we cannot write a common function and will investigate each individually

In [None]:
# map regions:
jpath = get_project_root() / "output/eer_dict.json"
with open(jpath) as f:
    eer = json.load(f)

eer

## regional weekly total deaths

In [None]:
rows = [4,5]
rows.extend(range(86,96))

weekly_reg_total_2020 = pd.read_excel(
    get_project_root() / "data/publishedweek532020.xlsx",
    sheet_name="Weekly figures 2020",
    skiprows=lambda x: x not in rows,
    usecols=lambda x: x not in ["Unnamed: 1"]
)

weekly_reg_total_2020 = weekly_reg_total_2020.set_index("Week number").transpose()
weekly_reg_total_2020.rename(columns={"Total deaths, all ages":"Total deaths, all ages (2020)"}, inplace=True)
weekly_reg_total_2020.rename(columns=eer, inplace=True)

print(f"total deaths 2020\n{weekly_reg_total_2020.shape}")
weekly_reg_total_2020.head()

In [None]:
# [c for c in weekly_reg_total_2021.columns if c not in ["Week ended"]] == [k for k in eer.keys()]

In [None]:
# eer.keys()

In [None]:
# weekly_reg_total_2020.dtypes

In [None]:
rows = [4,5]
rows.extend(range(82,92))

weekly_reg_total_2021 = pd.read_excel(
    get_project_root() / "data/publishedweek522021.xlsx",
    sheet_name="Weekly figures 2021",
    skiprows=lambda x: x not in rows,
    usecols=lambda x: x not in ["53 7", "Unnamed: 1"]
)

weekly_reg_total_2021 = weekly_reg_total_2021.set_index("Week number").transpose()
weekly_reg_total_2021.rename(columns={"Total deaths, all ages":"Total deaths, all ages (2021)"}, inplace=True)
weekly_reg_total_2021.rename(columns=eer, inplace=True)

print(f"total deaths 2021\n{weekly_reg_total_2021.shape}")
weekly_reg_total_2021.head()

In [None]:
# weekly_reg_total_2021.dtypes

#### format of the file for 2022 is completely different:

In [None]:
weekly_reg_total_2022 = pd.read_excel(
    get_project_root() / "data/publicationfileweek522022.xlsx",
    sheet_name="12a",
    skiprows=lambda x: x not in range(6,59),
    index_col="Week number"
)
weekly_reg_total_2022.rename(columns={"East of England":"Eastern", "Week ending":"Week ended"}, inplace=True)

print(f"total deaths 2022\n{weekly_reg_total_2022.shape}")
weekly_reg_total_2022.head()

## regional deaths due to COVID-19

In [None]:
rows = [4,5]
rows.extend(range(76,86))

weekly_reg_covid19_2020 = pd.read_excel(
    get_project_root() / "data/publishedweek532020.xlsx",
    sheet_name="Covid-19 - Weekly registrations",
    skiprows=lambda x: x not in rows,
    usecols=lambda x: x not in ["Unnamed: 1"]
)

weekly_reg_covid19_2020 = weekly_reg_covid19_2020.set_index("Week number").transpose()
weekly_reg_covid19_2020.rename(columns={"covid19 deaths, all ages":"covid19 deaths, all ages (2020)"}, inplace=True)
weekly_reg_covid19_2020.rename(columns=eer, inplace=True)

print(f"covid19 deaths 2020\n{weekly_reg_covid19_2020.shape}")
weekly_reg_covid19_2020.head()

In [None]:
rows = [4,5]
rows.extend(range(76,86))

weekly_reg_covid19_2021 = pd.read_excel(
    get_project_root() / "data/publishedweek522021.xlsx",
    sheet_name="Covid-19 - Weekly registrations",
    skiprows=lambda x: x not in rows,
    usecols=lambda x: x not in ["Unnamed: 1"]
)

weekly_reg_covid19_2021 = weekly_reg_covid19_2021.set_index("Week number").transpose()
weekly_reg_covid19_2021.rename(columns={"covid19 deaths, all ages":"covid19 deaths, all ages (2021)"}, inplace=True)
weekly_reg_covid19_2021.rename(columns=eer, inplace=True)

print(f"covid19 deaths 2021\n{weekly_reg_covid19_2021.shape}")
weekly_reg_covid19_2021.head()

In [None]:
weekly_reg_covid19_2022 = pd.read_excel(
    get_project_root() / "data/publicationfileweek522022.xlsx",
    sheet_name="12a",
    skiprows=lambda x: x not in range(61,114),
    index_col="Week number"
)
weekly_reg_covid19_2022.rename(columns={"East of England":"Eastern", "Week ending":"Week ended"}, inplace=True)

print(f"covid19 deaths 2022\n{weekly_reg_covid19_2022.shape}")
weekly_reg_covid19_2022.head()

---
## concat dfs

In [None]:
weekly_reg_total = pd.concat([weekly_reg_total_2020, weekly_reg_total_2021, weekly_reg_total_2022])
weekly_reg_covid19 = pd.concat([weekly_reg_covid19_2020, weekly_reg_covid19_2021, weekly_reg_covid19_2022])

In [None]:
# weekly_reg_total.head()

### regional deaths over time

In [None]:
sns.set_style("whitegrid")
sns.set_palette("tab20")

fig = plt.figure(figsize=(9,5))
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

for col in [col for col in weekly_reg_total.columns if col not in ["Week ended"]]:
    ax1.plot(
        weekly_reg_total["Week ended"],
        weekly_reg_total[col],
        label=col
    )

for col in [col for col in weekly_reg_total.columns if col not in ["Week ended"]]:
    ax2.plot(
        weekly_reg_covid19["Week ended"],
        weekly_reg_covid19[col],
        # label=col
    )

# weekly_reg_total.plot(
#     x="Week ended",
#     ax=ax1
# )

for ax in [ax1,ax2]:
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))

ax1.set_ylabel("total deaths recorded")
ax2.set_ylabel("covid19 deaths recorded")

ax2.set_xlabel("time")

fig.legend(ncol=5, loc="lower center", bbox_to_anchor=(0.5,-0.1))
fig.suptitle("regional total deaths and regional covid19 deaths 2020-2022")

fig.tight_layout();

In [None]:
# fig.savefig(
#     get_project_root() / "figures/reg_tot_reg_covid_time.png"
# )

In [None]:
# [col for col in weekly_reg_total.columns if col not in ["Week ended"]]

---
## non regional weekly 2020-2022 part-to-whole

In [None]:
weekly_reg_total["england_wales"] = weekly_reg_total[[col for col in weekly_reg_total.columns if col not in ["Week ended"]]].sum(axis=1)

weekly_reg_covid19["england_wales"] = weekly_reg_covid19[[col for col in weekly_reg_covid19.columns if col not in ["Week ended"]]].sum(axis=1)

In [None]:
# weekly_reg_covid19.head()

In [None]:
weekly_england_wales = weekly_reg_total[["Week ended", "england_wales"]].merge(
    weekly_reg_covid19[["Week ended", "england_wales"]],
    left_on="Week ended",
    right_on="Week ended",
    suffixes=("_total", "_covid19")
)

In [None]:
# seperate covid and non covid deaths for stackplot:
weekly_england_wales["england_wales_non_covid"] = weekly_england_wales["england_wales_total"] - weekly_england_wales["england_wales_covid19"]

# get percentage of deaths due to covid
weekly_england_wales["england_wales_pc_covid"] = weekly_england_wales["england_wales_covid19"] / weekly_england_wales["england_wales_total"]

In [None]:
weekly_england_wales.head(30)

In [None]:
fig = plt.figure(figsize=(7,5))
ax = fig.add_subplot(111)

ax.stackplot(
    weekly_england_wales["Week ended"],
    [
        weekly_england_wales["england_wales_non_covid"],
        weekly_england_wales["england_wales_covid19"]
    ],
    labels=[
        "deaths not related to covid19",
        "deaths relating to covid19"
    ]
)

ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))

ax.set_xlabel("time")
ax.set_ylabel("number of deaths recorded")
fig.suptitle("weekly covid19 and non covid19 related deaths, $2020$-$2022$")

ax.legend()
fig.tight_layout();

### can use twin axis to add in the \% of deaths due to covid

In [None]:
weekly_england_wales["england_wales_pc_covid"].max()

In [None]:
weekly_england_wales.loc[weekly_england_wales["england_wales_pc_covid"] == weekly_england_wales["england_wales_pc_covid"].max()]

In [None]:
sns.set_style("white")
sns.set_palette("tab20")

fig = plt.figure(figsize=(9,5))
ax1 = fig.add_subplot(111)
ax2=ax1.twinx()

ax1.stackplot(
    weekly_england_wales["Week ended"],
    [
        weekly_england_wales["england_wales_non_covid"],
        weekly_england_wales["england_wales_covid19"]
    ],
    labels=[
        "deaths not related to covid19",
        "deaths relating to covid19"
    ]
)

ax2.plot(
    weekly_england_wales["Week ended"],
    weekly_england_wales["england_wales_pc_covid"],
    ls="--",
    lw=1.3,
    c="tab:orange",
    label="percentage of deaths due to covid19"
)

ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
# ax1.xaxis.set_minor_locator(mdates.MonthLocator(interval=1))

ax1.set_xlabel("date")
ax1.set_ylabel("number of deaths recorded")
ax1.set_ylim(0,25000)
ax2.set_ylabel("percentage of deaths relating to covid19")
ax2.set_ylim(0,0.5)
# ax2.set_yticks(np.linspace(0,0.20,5))

# xticks, _ = plt.xticks(minor=True)
# for x0, x1 in zip(xticks[::2], xticks[1::2]):
#     ax1.axvspan(x0, x1, color="black", alpha=0.1, zorder=0, lw=0)

# ax1.set_axisbelow(True)
# ax2.set_axisbelow(True)
# plt.grid(which='major', axis='y', zorder=-3)
# ax2.set_zorder(-1)
# ax.patch.set_visible(True)
# ax2.patch.set_visible(False)

fig.suptitle("weekly covid19 and non-covid19 related deaths, 2020-2022")
# handling the legend https://stackoverflow.com/questions/5484922/secondary-axis-with-twinx-how-to-add-to-legend
fig.legend(loc="upper right", bbox_to_anchor=(1.24,0.99), bbox_transform=ax.transAxes)
fig.tight_layout();

In [None]:
# fig.savefig(
#     get_project_root() / "figures/weekly_covid_non_covid.png"
# )

In [None]:
weekly_england_wales.iloc[6:20]

---
## covid19 deaths per year as a percentage of total deaths 2020-2022

In [None]:
# dfs = [weekly_reg_total_2020, weekly_reg_total_2021, weekly_reg_total_2022, weekly_reg_covid19_2020, weekly_reg_covid19_2021, weekly_reg_covid19_2022]

In [None]:
total_2020 = pd.DataFrame(weekly_reg_total_2020.sum()).transpose()
total_2020["desc"] = ["2020 total"]
total_2020

In [None]:
# total_2020.dtypes

In [None]:
total_2021 = pd.DataFrame(weekly_reg_total_2021.sum()).transpose()
total_2021["desc"] = ["2021 total"]
total_2022 = pd.DataFrame(weekly_reg_total_2022.sum()).transpose()
total_2022["desc"] = ["2022 total"]
covid19_2020 = pd.DataFrame(weekly_reg_covid19_2020.sum()).transpose()
covid19_2020["desc"] = ["2020 covid"]
covid19_2021 = pd.DataFrame(weekly_reg_covid19_2021.sum()).transpose()
covid19_2021["desc"] = ["2021 covid"]
covid19_2022 = pd.DataFrame(weekly_reg_covid19_2022.sum()).transpose()
covid19_2022["desc"] = ["2022 covid"]

In [None]:
dfs = [total_2020, total_2021, total_2022, covid19_2020, covid19_2021, covid19_2022]

In [None]:
# join the data summary for the three years:
summary = pd.concat(dfs)
summary.set_index("desc", inplace=True)

# find deaths by covid as a percentage:
summary.loc["2020_covid_pc"] = summary.loc["2020 covid"] / summary.loc["2020 total"]
summary.loc["2021_covid_pc"] = summary.loc["2021 covid"] / summary.loc["2021 total"]
summary.loc["2022_covid_pc"] = summary.loc["2022 covid"] / summary.loc["2022 total"]

summary = summary.transpose()
summary

In [None]:
# seemed to be a problem with the datattype after summing?
for col in summary.columns:
    summary[col] = summary[col].astype(float)

In [None]:
summary.dtypes

## plot choropleth for UK regional \% deaths due to covid

In [None]:
geo = gpd.read_parquet(
    get_project_root() / "output/EWgeo.pkl",
)

In [None]:
geo = geo.set_crs("EPSG:27700")

In [None]:
# merge geodataframe on left to keep geometry:
geo_summary = geo.merge(
    summary,
    left_on="EER13NM",
    # right_on="desc",
    right_index=True
)

geo_summary

## choropleth map (nat regional)

In [None]:
geo_summary.to_crs("EPSG:4326", inplace=True)

In [None]:
fig = plt.figure(figsize=(12,4.5))
ax1 = fig.add_subplot(131)
ax2 = fig.add_subplot(132)
ax3 = fig.add_subplot(133)

cbar_adj = legend_kwds={
    "shrink":.5,
    "orientation":"horizontal",
    "location":"bottom",
    "pad":-0.005
}

for year, a in [("2020", ax1), ("2021", ax2), ("2022", ax3)]:
    a.axis("off")
    geo_summary.plot(
        column=f"{year}_covid_pc",
        cmap="Reds",
        linewidth=0.5,
        ax=a,
        edgecolor="0.1",
        legend=True,
        legend_kwds=cbar_adj
    )
    a.set_title(f"{year}")

fig.suptitle("UK regional percentage of deaths due to covid 2020-2022")
fig.tight_layout()

In [None]:
# fig.savefig(
#     get_project_root() / "figures/choro.png"
# )

In [None]:
fig = px.choropleth(
    geo_summary,
    geojson=geo_summary.geometry,
    locations=geo_summary.index,
    color="2020_covid_pc",
    color_continuous_scale="reds",
    hover_name="EER13NM",
    hover_data=["2020 total", "2020 covid", "2020_covid_pc"],
)

fig.update_geos(
    fitbounds="geojson",
    visible=False
)

In [None]:
# fig.write_html(get_project_root()/"output/choro.html")

### regional population density

|Region                   | Number of people per square km |
|:------------------------|:-------------------------------|
|London                   | 5,596                          |
|North West               | 526                            |
|South East               | 487                            |
|West Midlands            | 458                            |
|Yorkshire and The Humber | 356                            |
|Eastern                  | 332                            |
|North East               | 308                            |
|East Midlands            | 312                            |
|South West               | 240                            |
|Wales                    | 150                           