# Covid Vaccination in England by region and age group

In this notebook we'll learn how to visualise England's Covid vaccination data by region and age group.

---
**NOTE**
When I say 'vaccinated' in this notebook, take that to mean people that have received at least one dose of a Covid vaccine. It will be way too verbose to add that caveat each time.
----

In [1]:
import pandas as pd
import altair as alt
import numpy as np

In [61]:
vaccination_rates_by_region = pd.DataFrame([
    {"Region": "East Midlands", "Overall": 48.877331},
    {"Region": "London", "Overall": 32.58}
])
vaccination_rates_by_region = vaccination_rates_by_region.convert_dtypes()
vaccination_rates_by_region

Unnamed: 0,Region,Overall
0,East Midlands,48.877331
1,London,32.58


In [15]:
vaccination_rates_by_region.to_csv("vaccination_rates_by_region.csv", index=False)

In [13]:
chart = (alt.Chart(vaccination_rates_by_region).mark_bar().encode(
            x=alt.X('Region'),
            y=alt.Y('Overall', axis=alt.Axis(title='Vaccinations')),    
            tooltip=[alt.Tooltip('Overall', format=",")])
.properties(width=600))
chart

TypeError: Cannot interpret 'Float64Dtype()' as a data type

alt.Chart(...)

In [2]:
import sys
sys.path.append("..")

import pandas as pd
import altair as alt
import data as dt
import numpy as np
from dateutil import parser

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

latest_weekly_date = parser.parse("2021-04-22")
spreadsheet = f"../data/COVID-19-weekly-announced-vaccinations-{latest_weekly_date.strftime('%-d-%B-%Y')}.xlsx"

In [3]:
vaccinations = dt.vaccinations_dataframe(spreadsheet)    
population = dt.population_dataframe(spreadsheet)
population = population.merge(vaccinations[["UTLA Name", "Region Name (administrative)", "LTLA Code"]], 
                              left_on="LTLA Code", right_on="LTLA Code")

In [4]:
population_by_region = population.groupby(["Region Name (administrative)"]).sum()
population_by_region.insert(0, "Region", list(population_by_region.index))
population_by_region.loc[:, "Overall"] = population_by_region.sum(axis=1).astype("int32")

vaccinations_by_region = vaccinations.groupby(["Region Name (administrative)"]).sum()
vaccinations_by_region.insert(0, "Region", list(vaccinations_by_region.index))
vaccinations_by_region.loc[:, "Overall"] = vaccinations_by_region.sum(axis=1).astype("int32")

In [31]:
vaccination_rates_by_region = ((vaccinations_by_region
    .select_dtypes(exclude='object')
    .div(population_by_region.select_dtypes(exclude='object')) * 100)
    .combine_first(vaccinations_by_region)[vaccinations_by_region.columns])

vaccination_rates_by_region.loc[:, "Overall"] = vaccinations_by_region["Overall"].div(population_by_region["Overall"]) * 100
# vaccination_rates_by_region = vaccination_rates_by_region.convert_dtypes()
vaccination_rates_by_region

Unnamed: 0_level_0,Region,Under 45,45-49,50-54,55-59,60-64,65-69,70-74,75-79,80+,Overall
Region Name (administrative),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
East Midlands,East Midlands,17.069002,66.290826,85.547283,87.98528,90.251743,93.021329,95.31394,96.288023,96.073968,48.877332
East of England,East of England,15.27893,54.445024,85.317557,87.881799,89.715456,92.13494,94.692416,95.809976,95.910826,46.846857
London,London,12.408192,47.580674,69.682782,73.462612,77.31452,80.898686,84.871764,86.20538,86.241193,32.582798
North East,North East,16.448244,53.614154,86.155653,89.578176,91.884398,94.246912,96.099774,97.143304,96.782161,49.042552
North West,North West,16.303058,53.513413,82.570179,86.004898,88.84743,91.689731,94.332372,95.537199,95.478666,46.163862
South East,South East,14.972864,54.206753,85.729001,88.034643,89.792686,92.206861,94.481063,95.590852,95.75793,46.676111
South West,South West,15.936903,57.637714,87.384272,89.319349,90.734293,93.172764,95.160962,96.247263,96.502342,50.400086
West Midlands,West Midlands,15.725881,60.611814,82.701015,85.573148,88.650971,91.544867,94.20287,95.285345,94.523955,45.830636
Yorkshire and The Humber,Yorkshire and The Humber,15.761452,61.106731,84.545536,88.088435,90.248426,92.851659,95.191933,96.248414,95.944478,46.781886


In [67]:
# https://martinjc.github.io/UK-GeoJSON/
regions = alt.topo_feature("../data/topo_eer.json", 'eer')
background = alt.Chart(regions).mark_geoshape(
    stroke='white',
    strokeWidth=2
).encode(
    tooltip=["Region:N", f"{field}:Q"],
    color = alt.Color(f"{field}:Q", scale=alt.Scale(scheme="turbo"))
).transform_lookup(
    lookup='properties.EER13NM',
    from_=alt.LookupData(
        data=vaccination_rates_by_region, 
        key='Region', 
        fields=list(vaccination_rates_by_region.columns))
)
background

In [81]:
vaccinations_by_region

Unnamed: 0_level_0,Region,Under 45,45-49,50-54,55-59,60-64,65-69,70-74,75-79,80+,Overall
Region Name (administrative),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
East Midlands,East Midlands,485460,218006,311742,313686,274581,243581,253313,186637,239302,2526308
East of England,East of England,569351,241143,401257,401575,347677,305560,326115,241330,331956,3165964
London,London,853819,343168,463764,431490,354518,277006,249018,176440,256057,3405280
North East,North East,249748,91514,166563,179486,166025,145734,144437,99398,135586,1378491
North West,North West,726932,267231,453918,468098,413607,356795,366003,261963,347803,3662350
South East,South East,817299,357448,596745,595969,511997,442604,466436,349245,487608,4625351
South West,South West,498757,213180,363100,378890,340328,310365,332343,250958,341985,3029906
West Midlands,West Midlands,569091,244542,363858,358329,315726,278907,283366,219142,291458,2924419
Yorkshire and The Humber,Yorkshire and The Humber,521389,223196,341706,345881,308026,268514,275831,196469,265771,2746783


In [21]:
from vega_datasets import data
data.us_10m.url

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/us-10m.json'

In [17]:
import pandas as pd

df = pd.DataFrame(data=[{"name": "Mark", "age": 37}, {"name": "David", "age": 36}])
display(df)
df.set_index("name", inplace=True)
display(df)

Unnamed: 0,name,age
0,Mark,37
1,David,36


Unnamed: 0_level_0,age
name,Unnamed: 1_level_1
Mark,37
David,36


In [21]:
to_append = pd.DataFrame([
    {"age": 42}
])
df.append(to_append)

Unnamed: 0,age
Mark,37
David,36
Ryan,40
0,42


In [19]:
to_append = pd.DataFrame([
    {"age": 42}
], index = ["John"])
df.append(to_append)

Unnamed: 0,age
Mark,37
David,36
John,42


In [20]:
df.loc["Ryan"] = [40]
df

Unnamed: 0_level_0,age
name,Unnamed: 1_level_1
Mark,37
David,36
Ryan,40
