### Input.csv contains 

- Age 
- Sex 
- bmi value 
- Care Home type (PN, PC, PS, U) 
- Ethnicity (6 categories) 
- MSOA 
- STP 
- IMD (rounded to nearest 100) 
- rural/urban 
- Condition Flags:
    - bone_marrow_transplant
    - cancer_excl_lung_and_haem
    - chemo_or_radio
    - chronic_kidney_disease
    - chronic_liver_disease
    - chronic_cardiac_disease
    - current_copd
    - cystic_fibrosis
    - dementia
    - diabetes
    - dialysis
    - haematological_cancer
    - heart_failure
    - intel_dis_incl_downs_syndrome
    - lung_cancer
    - non_severe_asthma
    - other_heart_disease
    - other_neuro
    - other_respiratory_conditions
    - permanant_immunosuppression
    - temporary_immunosuppression
    - severe_asthma
    - sickle_cell_disease
    - solid_organ_transplantation
    - stroke

## Packages

In [None]:
from random import randint
from copy import copy

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import folium
import geopandas as gpd

## Data

In [None]:
df = pd.read_csv("../output/input.csv")
df_workforce = pd.read_csv("./data/demographics.csv")

In [None]:
# order matches codelist.py
condition_cols = [
    "solid_organ_transplantation",
    "chemo_or_radio",
    "lung_cancer",
    "cancer_excl_lung_and_haem",
    "haematological_cancer",
    "bone_marrow_transplant",
    "cystic_fibrosis",
    "severe_asthma",
    "current_copd",
    "sickle_cell_disease",
    "permanant_immunosuppression",
    "temporary_immunosuppression",
    "chronic_cardiac_disease",
    "intel_dis_incl_downs_syndrome",
    "dialysis",
    "non_severe_asthma",
    "other_respiratory_conditions",
    "heart_failure",
    "other_heart_disease",
    "diabetes",
    "chronic_kidney_disease",
    "chronic_liver_disease",
    "other_neuro",
    "dementia",
    "stroke",
]

In [None]:
# Clean - replace NaNs
df.loc[:, condition_cols] = (
    df.loc[:, condition_cols]
    .where(df.loc[:, condition_cols].isnull(), 1)
    .fillna(value=0)
    .astype(int)
)

senti_val = -99

df.loc[:, "ethnicity"].fillna(value=senti_val, inplace=True)
df.loc[:, "bmi"].fillna(value=senti_val, inplace=True)

## Feature Generation

Set High and Medium Risk Flags based on conditions.  

In [None]:
df["num_conditions"] = df.loc[:, condition_cols].sum(axis=1)
df["risk"] = 0

conditionals = [
    (df["num_conditions"] == 0),
    (df["num_conditions"] > 0) & (df["num_conditions"] < 3),
    (df["num_conditions"] >= 3),
]
risk_values = ["low", "medium", "high"]

df["risk"] = np.select(conditionals, risk_values)

Set Priority Flag based on September JCVI advice (exluding workforce considerations)

In [None]:
priority_labels = [
    "Older Residents & care workers in Care Homes",
    "80+ & Health and care Workers",
    "75+",
    "70+",
    "65+",
    "High Risk under 65",
    "Medium Risk under 65",
    "60+",
    "55+",
    "50+",
    "Rest",
]

In [None]:
df["priority"] = 0

conditionals = [
    (df["age"] >= 65) & (df["care_home_type"] != "U"),
    (df["age"] >= 80) & (df["care_home_type"] == "U"),
    (df["age"] >= 75) & (df["care_home_type"] == "U"),
    (df["age"] >= 70) & (df["care_home_type"] == "U"),
    (df["age"] >= 65) & (df["care_home_type"] == "U"),
    (df["age"] < 65) & (df["risk"] == "high"),
    (df["age"] < 65) & (df["risk"] == "medium"),
    (df["age"] >= 60) & (df["risk"] == "low"),
    (df["age"] >= 55) & (df["risk"] == "low"),
    (df["age"] >= 50) & (df["risk"] == "low"),
    (df["age"] < 50),
]
priority_values = list(range(1, len(priority_labels) + 1))

df["priority"] = np.select(conditionals, priority_values)

In [None]:
freq = df["priority"].value_counts().sort_index()

print(freq)

Create workforce calculations for age bands (including risk exclusions)

In [None]:
# As we need to adapt profiles, we need to first have a factor to use for the dummy data
factor = len(df.index) / int(
    df_workforce.loc[
        (df_workforce["workforce"] == "England")
        & (df_workforce["metric"] == "all")
    ].value
)

# From the calculation above, we also need a factor for the number of higher risk people
risk_prop = (freq[6] + freq[7]) / len(df.index)

In [None]:
# Calculate total for each group
df_wf_tot = (
    df_workforce.loc[df_workforce["metric"] == "all", ["workforce", "value"]]
    .copy(deep=True)
    .reset_index(drop=True)
)

df_wf_tot["factor_total"] = df_wf_tot["value"].astype(int) * factor

df_wf_tot = df_wf_tot.set_index("workforce")
df_wf_tot = df_wf_tot.drop("value", axis=1)
df_wf_tot = df_wf_tot.drop("England", axis=0)

wf_tot = df_wf_tot["factor_total"]

In [None]:
# 5-year age-bands for workforce (assume no workforce above 70)
sc_list = ["SocialCare_Residential", "SocialCare_NonResidential"]

lower_ab_sc = 32 / 37
lower_ab_not_sc = 32 / 36

age_bands = ["under_55", "55_59", "60_64", "65+"]

df_wf_ab = (
    df_workforce.loc[
        df_workforce["metric"].isin(age_bands),
        ["workforce", "metric", "value"],
    ]
    .copy(deep=True)
    .reset_index(drop=True)
)

df_wf_ab.loc[df_wf_ab["metric"] == "65+", "metric"] = "65_70"

for wfg in df_wf_ab["workforce"].unique():
    wfg_lower = df_wf_ab[
        (df_wf_ab["workforce"] == wfg) & (df_wf_ab["metric"] == "under_55")
    ]["value"].values[0]

    multiplier = lower_ab_sc if wfg in (sc_list) else lower_ab_not_sc

    new_ab_series = [
        pd.Series(
            [wfg, "18_49", wfg_lower * multiplier], index=df_wf_ab.columns
        ),
        pd.Series(
            [wfg, "50_54", wfg_lower * (1 - multiplier)],
            index=df_wf_ab.columns,
        ),
    ]

    df_wf_ab = df_wf_ab.append(new_ab_series, ignore_index=True)

age_bands_updated = ["18_49", "50_54", "55_59", "60_64", "65_70"]

df_wf_ab = df_wf_ab[df_wf_ab["metric"] != "under_55"]
df_wf_ab = df_wf_ab[df_wf_ab["workforce"] != "England"]
df_wf_ab = df_wf_ab.sort_values(["workforce", "metric"]).reset_index(drop=True)

In [None]:
wf_not_65_70 = (
    df_wf_ab[~(df_wf_ab["metric"] == "65_70")]
    .groupby("workforce")
    .sum()["value"]
)

wf_ab = dict()

for ab in age_bands_updated:
    _wf = df_wf_ab[df_wf_ab["metric"] == ab].set_index("workforce")["value"]

    wf_ab.update({ab: _wf})

Need to ensure no negatives!

In [None]:
freq_wf = copy(freq)

freq_wf[1] = freq[1] + wf_tot["SocialCare_Residential"]
freq_wf[2] = freq[2] + (wf_tot.sum() - wf_tot["SocialCare_Residential"])

freq_wf[3] = freq[3]
freq_wf[4] = freq[4]

freq_wf[5] = freq[5] - wf_ab["65_70"] @ wf_tot

freq_wf[6] = freq[6] - (0.5 * risk_prop) * (wf_not_65_70 @ wf_tot)
freq_wf[7] = freq[7] - (0.5 * risk_prop) * (wf_not_65_70 @ wf_tot)

freq_wf[8] = freq[8] - (1 - risk_prop) * (wf_ab["60_64"] @ wf_tot)
freq_wf[9] = freq[9] - (1 - risk_prop) * (wf_ab["55_59"] @ wf_tot)
freq_wf[10] = freq[10] - (1 - risk_prop) * (wf_ab["50_54"] @ wf_tot)
freq_wf[11] = freq[11] - (1 - risk_prop) * (wf_ab["18_49"] @ wf_tot)

In [None]:
print(freq)
print(sum(freq))
print("\n")
print(freq_wf)
print(sum(freq_wf))

## Waterfall chart

In [None]:
csum = freq_wf.cumsum().shift(1).fillna(value=0)

print(csum)

In [None]:
wf_plot = freq_wf.plot(
    kind="bar",
    stacked=True,
    bottom=csum,
    legend=None,
    title="Eligibility by Priority Waterfall",
    figsize=(16, 6),
)

wf_plot.set_xticklabels(priority_labels)
wf_plot.grid()

# Ethnicity, IMD, Rurality

In [None]:
sns.displot(df, x="priority", hue="ethnicity", kind="kde", bw_adjust=2)

In [None]:
sns.displot(df, x="priority", hue="imd", kind="kde", bw_adjust=2)

In [None]:
sns.displot(df, x="priority", hue="rural_urban", kind="kde", bw_adjust=2)

# STP, MSOA

In [None]:
sns.displot(df, x="priority", hue="stp", kind="kde", bw_adjust=2)

In [None]:
sns.displot(df, x="priority", hue="msoa", kind="kde", bw_adjust=2)

### Map

In [None]:
dfmap = gpd.read_file("./data/STP.geojson")
rand = [randint(0, 9) for p in range(0, len(dfmap.index))]
dfmap["Value"] = rand

dfmap.head()

In [None]:
min_lat, max_lat = 48.77, 60
min_lon, max_lon = -9.05, 5

kanton_map = folium.Map(
    location=[54.6, -3.4],
    tiles="Mapbox Bright",
    zoom_start=7,
    min_lat=min_lat,
    max_lat=max_lat,
    min_lon=min_lon,
    max_lon=max_lon,
    max_zoom=18,
    min_zoom=5,
)

kanton_map.choropleth(
    geo_data="./data/STP.geojson",
    data=dfmap,
    columns=["stp20cd", "Value"],
    key_on="feature.properties.stp20cd",
    # threshold_scale=[0,1,3,5,9],
    fill_color="BuPu",
)

kanton_map