### Input.csv contains 

- Age 
- Sex 
- bmi value 
- Care Home type (PN, PC, PS, U) 
- Ethnicity (6 categories) 
- MSOA 
- STP 
- IMD (rounded to nearest 100) 
- rural/urban 
- Condition Flags:
    - bone_marrow_transplant
    - cancer_excl_lung_and_haem
    - chemo_or_radio
    - chronic_kidney_disease
    - chronic_liver_disease
    - chronic_cardiac_disease
    - current_copd
    - cystic_fibrosis
    - dementia
    - diabetes
    - dialysis
    - haematological_cancer
    - heart_failure
    - intel_dis_incl_downs_syndrome
    - lung_cancer
    - non_severe_asthma
    - other_heart_disease
    - other_neuro
    - other_respiratory_conditions
    - permanant_immunosuppression
    - temporary_immunosuppression
    - severe_asthma
    - sickle_cell_disease
    - solid_organ_transplantation
    - stroke

## Packages

In [None]:
from random import randint

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import folium
import geopandas as gpd

## Data

In [None]:
df = pd.read_csv("../output/input.csv")
df_workforce = pd.read_csv("./data/demographics.csv")

In [None]:
# order matches codelist.py
condition_cols = [
    "solid_organ_transplantation",
    "chemo_or_radio",
    "lung_cancer",
    "cancer_excl_lung_and_haem",
    "haematological_cancer",
    "bone_marrow_transplant",
    "cystic_fibrosis",
    "severe_asthma",
    "current_copd",
    "sickle_cell_disease",
    "permanant_immunosuppression",
    "temporary_immunosuppression",
    "chronic_cardiac_disease",
    "intel_dis_incl_downs_syndrome",
    "dialysis",
    "non_severe_asthma",
    "other_respiratory_conditions",
    "heart_failure",
    "other_heart_disease",
    "diabetes",
    "chronic_kidney_disease",
    "chronic_liver_disease",
    "other_neuro",
    "dementia",
    "stroke",
]

In [None]:
# Clean - replace NaNs
df.loc[:, condition_cols] = (
    df.loc[:, condition_cols]
    .where(df.loc[:, condition_cols].isnull(), 1)
    .fillna(value=0)
    .astype(int)
)

senti_val = -99

df.loc[:, "ethnicity"].fillna(value=senti_val, inplace=True)
df.loc[:, "bmi"].fillna(value=senti_val, inplace=True)

## Feature Generation

Set High and Medium Risk Flags based on conditions.  

In [None]:
df["num_conditions"] = df.loc[:, condition_cols].sum(axis=1)
df["risk"] = 0

conditions = [
    (df["num_conditions"] == 0),
    (df["num_conditions"] > 0) & (df["num_conditions"] < 3),
    (df["num_conditions"] >= 3),
]
values = ["low", "medium", "high"]

df["risk"] = np.select(conditions, values)

Set Priority Flag based on September JCVI advice (exluding workforce considerations)

In [None]:
df["priority"] = 0

conditions = [
    (df["age"] >= 65) & (df["care_home_type"] != "U"),
    (df["age"] >= 80) & (df["care_home_type"] == "U"),
    (df["age"] >= 75) & (df["care_home_type"] == "U"),
    (df["age"] >= 70) & (df["care_home_type"] == "U"),
    (df["age"] >= 65) & (df["care_home_type"] == "U"),
    (df["age"] < 65) & (df["risk"] == "high"),
    (df["age"] < 65) & (df["risk"] == "medium"),
    (df["age"] >= 60) & (df["risk"] == "low"),
    (df["age"] >= 55) & (df["risk"] == "low"),
    (df["age"] >= 50) & (df["risk"] == "low"),
    (df["age"] < 50),
]
values = list(range(1, 12))

df["priority"] = np.select(conditions, values)

In [None]:
freq = df["priority"].value_counts().sort_index()

print(freq)

Create workforce calculations for age bands (including risk exclusions)

In [None]:
# As we need to adapt profiles, we need to first have a factor to use for the dummy data
factor = len(df.index) / int(
    df_workforce.loc[
        (df_workforce["workforce"] == "England")
        & (df_workforce["metric"] == "all")
    ].value
)

# From the calculation above, we also need a factor for the number of higher risk people
risk_proportion = (freq[6] + freq[7]) / len(df.index)

In [None]:
# Calculate total for care home workers
chw = (
    int(
        df_workforce.loc[
            (df_workforce["workforce"] == "SocialCare_Residential")
            & (df_workforce["metric"] == "all")
        ].value
    )
    * factor
)

# Calculate total for non residential social care
sc = (
    int(
        df_workforce.loc[
            (df_workforce["workforce"] == "SocialCare_NonResidential")
            & (df_workforce["metric"] == "all")
        ].value
    )
    * factor
)

#  Calculate total Primary care workers
pw = (
    int(
        df_workforce.loc[
            (df_workforce["workforce"] == "PrimaryCare")
            & (df_workforce["metric"] == "all")
        ].value
    )
    * factor
)

# Calculate total for health and care workers
hcw = (
    int(
        df_workforce.loc[
            (df_workforce["workforce"] == "HCHS")
            & (df_workforce["metric"] == "all")
        ].value
    )
    * factor
)

# Calculate total for dentists and DCP
dent = (
    int(
        df_workforce.loc[
            (df_workforce["workforce"] == "Dentists")
            & (df_workforce["metric"] == "all")
        ].value
    )
    * factor
)

# Calculate total community pharmacists
phar = (
    int(
        df_workforce.loc[
            (df_workforce["workforce"] == "CommunityPharmacy")
            & (df_workforce["metric"] == "all")
        ].value
    )
    * factor
)

In [None]:
# 5-year age-bands for workforce (assume no workforce above 70)
chw_1850 = (
    32
    / 37
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "SocialCare_Residential")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

chw_5055 = (
    5
    / 37
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "SocialCare_Residential")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

chw_5560 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "SocialCare_Residential")
        & (df_workforce["metric"] == "55_59")
    ].value
)

chw_6065 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "SocialCare_Residential")
        & (df_workforce["metric"] == "60_64")
    ].value
)

chw_6570 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "SocialCare_Residential")
        & (df_workforce["metric"] == "65+")
    ].value
)

In [None]:
sc_1850 = (
    32
    / 37
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "SocialCare_NonResidential")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

sc_5055 = (
    5
    / 37
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "SocialCare_NonResidential")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

sc_5560 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "SocialCare_NonResidential")
        & (df_workforce["metric"] == "55_59")
    ].value
)

sc_6065 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "SocialCare_NonResidential")
        & (df_workforce["metric"] == "60_64")
    ].value
)

sc_6570 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "SocialCare_NonResidential")
        & (df_workforce["metric"] == "65+")
    ].value
)

In [None]:
pw_1850 = (
    32
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "PrimaryCare")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

pw_5055 = (
    4
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "PrimaryCare")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

pw_5560 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "PrimaryCare")
        & (df_workforce["metric"] == "55_59")
    ].value
)

pw_6065 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "PrimaryCare")
        & (df_workforce["metric"] == "60_64")
    ].value
)

pw_6570 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "PrimaryCare")
        & (df_workforce["metric"] == "65+")
    ].value
)

In [None]:
hcw_1850 = (
    32
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "HCHS")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

hcw_5055 = (
    4
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "HCHS")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

hcw_5560 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "HCHS")
        & (df_workforce["metric"] == "55_59")
    ].value
)

hcw_6065 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "HCHS")
        & (df_workforce["metric"] == "60_64")
    ].value
)

hcw_6570 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "HCHS")
        & (df_workforce["metric"] == "65+")
    ].value
)

In [None]:
dent_1850 = (
    32
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "Dentists")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

dent_5055 = (
    4
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "Dentists")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

dent_5560 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "Dentists")
        & (df_workforce["metric"] == "55_59")
    ].value
)

dent_6065 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "Dentists")
        & (df_workforce["metric"] == "60_64")
    ].value
)

dent_6570 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "Dentists")
        & (df_workforce["metric"] == "65+")
    ].value
)

In [None]:
phar_1850 = (
    32
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "CommunityPharmacy")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

phar_5055 = (
    4
    / 36
    * float(
        df_workforce.loc[
            (df_workforce["workforce"] == "CommunityPharmacy")
            & (df_workforce["metric"] == "under_55")
        ].value
    )
)

phar_5560 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "CommunityPharmacy")
        & (df_workforce["metric"] == "55_59")
    ].value
)

phar_6065 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "CommunityPharmacy")
        & (df_workforce["metric"] == "60_64")
    ].value
)

phar_6570 = float(
    df_workforce.loc[
        (df_workforce["workforce"] == "CommunityPharmacy")
        & (df_workforce["metric"] == "65+")
    ].value
)

Need to ensure no negatives!

In [None]:
freq_wf = freq

freq_wf[1] = freq[1] + chw
freq_wf[2] = freq[2] + hcw + pw + sc + dent + phar
freq_wf[3] = freq[3]
freq_wf[4] = freq[4]
freq_wf[5] = freq[5] - (
    chw * chw_6570
    + hcw * hcw_6570
    + pw * pw_6570
    + sc * sc_6570
    + dent * dent_6570
    + phar * phar_6570
)
freq_wf[6] = freq[6] - (0.5 * risk_proportion) * (
    chw * (chw_6065 + chw_5560 + chw_5055 + chw_1850)
    + hcw * (hcw_6065 + hcw_5560 + hcw_5055 + hcw_1850)
    + pw * (pw_6065 + pw_5560 + pw_5055 + pw_1850)
    + sc * (sc_6065 + sc_5560 + sc_5055 + sc_1850)
    + dent * (dent_6065 + dent_5560 + dent_5055 + dent_1850)
    + phar * (phar_6065 + phar_5560 + phar_5055 + phar_1850)
)
freq_wf[7] = freq[7] - (0.5 * risk_proportion) * (
    chw * (chw_6065 + chw_5560 + chw_5055 + chw_1850)
    + hcw * (hcw_6065 + hcw_5560 + hcw_5055 + hcw_1850)
    + pw * (pw_6065 + pw_5560 + pw_5055 + pw_1850)
    + sc * (sc_6065 + sc_5560 + sc_5055 + sc_1850)
    + dent * (dent_6065 + dent_5560 + dent_5055 + dent_1850)
    + phar * (phar_6065 + phar_5560 + phar_5055 + phar_1850)
)
freq_wf[8] = freq[8] - (1 - risk_proportion) * (
    chw * chw_6065
    + hcw * hcw_6065
    + pw * pw_6065
    + sc * sc_6065
    + dent * dent_6065
    + phar * phar_6065
)
freq_wf[9] = freq[9] - (1 - risk_proportion) * (
    chw * chw_5560
    + hcw * hcw_5560
    + pw * pw_5560
    + sc * sc_5560
    + dent * dent_5560
    + phar * phar_5560
)
freq_wf[10] = freq[10] - (1 - risk_proportion) * (
    chw * chw_5055
    + hcw * hcw_5055
    + pw * pw_5055
    + sc * sc_5055
    + dent * dent_5055
    + phar * phar_5055
)
freq_wf[11] = freq[11] - (1 - risk_proportion) * (
    chw * chw_1850
    + hcw * hcw_1850
    + pw * pw_1850
    + sc * sc_1850
    + dent * dent_1850
    + phar * phar_1850
)

In [None]:
print(freq)
print(sum(freq))
print("\n")
print(freq_wf)
print(sum(freq_wf))

## Waterfall chart

In [None]:
csum = freq_wf.cumsum().shift(1).fillna(value=0)

print(csum)

In [None]:
labels = [
    "Older Residents & care workers in Care Homes",
    "80+ & Health and care Workers",
    "75+",
    "70+",
    "65+",
    "High Risk under 65",
    "Medium Risk under 65",
    "60+",
    "55+",
    "50+",
    "Rest",
]

wf_plot = freq_wf.plot(
    kind="bar",
    stacked=True,
    bottom=csum,
    legend=None,
    title="Eligibility by Priority Waterfall",
    figsize=(16, 6),
)

wf_plot.set_xticklabels(labels)
wf_plot.grid()

# Ethnicity, IMD, Rurality

In [None]:
sns.displot(df, x="priority", hue="ethnicity", kind="kde", bw_adjust=2)

In [None]:
sns.displot(df, x="priority", hue="imd", kind="kde", bw_adjust=2)

In [None]:
sns.displot(df, x="priority", hue="rural_urban", kind="kde", bw_adjust=2)

# STP, MSOA

In [None]:
sns.displot(df, x="priority", hue="stp", kind="kde", bw_adjust=2)

In [None]:
sns.displot(df, x="priority", hue="msoa", kind="kde", bw_adjust=2)

### Map

In [None]:
dfmap = gpd.read_file("./data/STP.geojson")
rand = [randint(0, 9) for p in range(0, len(dfmap.index))]
dfmap["Value"] = rand

dfmap.head()

In [None]:
min_lat, max_lat = 48.77, 60
min_lon, max_lon = -9.05, 5

kanton_map = folium.Map(
    location=[54.6, -3.4],
    tiles="Mapbox Bright",
    zoom_start=7,
    min_lat=min_lat,
    max_lat=max_lat,
    min_lon=min_lon,
    max_lon=max_lon,
    max_zoom=18,
    min_zoom=5,
)

kanton_map.choropleth(
    geo_data="./data/STP.geojson",
    data=dfmap,
    columns=["stp20cd", "Value"],
    key_on="feature.properties.stp20cd",
    # threshold_scale=[0,1,3,5,9],
    fill_color="BuPu",
)

kanton_map