## Exploration of time/geo data using UK covid datasets and geojson local Authoirty boundaries

### version 0.91


# Please note, currently you have to run interact twice for each map to display 

todo: fix 'disappearing map' problem 

todo: use output widgets for map display

todo: initial time based data exploration


In [39]:
# imports 
from datetime import datetime, date
import geopandas as gpd
import pandas as pd

from ipywidgets import interact, interactive, fixed, interact_manual
import plotly.express as px
import json
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, interactive

In [None]:
# SET UP CONSTANTS
from  pathlib import Path

POP_DATA_PATH = Path('./data/ukpopulation2018.csv')
COVID_DATA_PATH = Path('./data/utla_2023-01-05.csv')
GEOJSON_PATH = Path(r'data/CTYUA_(Apr_2019)_Ultra_Generalised_Clipped_Boundaries_Great_Britain.geojson')
AREA_CODE = 'areaCode'
POP = 'pop'
DEATHS_COL = 'cum_deaths'
DEATH_100K = 'Deaths per 100k'
VACC_100K = 'Vaccinated per 100k'
VACC_COL = 'cum_vaccinated'
FORM_DATE ='Formatted Date'
MAP_TITLE = 'COVID 19 {map_title}, by Local Authority'
COVID_DATA = pd.read_csv(COVID_DATA_PATH, header=0)
PCT_VACC = '% vaccinated'

In [3]:

def load_and_clean_geo_data():
    gdf = gpd.read_file(GEOJSON_PATH)
    gdf.rename({'ctyua19cd': AREA_CODE}, axis=1, inplace=True)
    gdf_reduced = gdf.drop(columns=[col for col in gdf.columns if col not in ['geometry', AREA_CODE]])
    gdf_reduced.set_index('areaCode', inplace=True)
    gdf_reduced_json = json.loads(gdf_reduced.to_json())
    return gdf_reduced

In [52]:
 def clean_covid_data(df, start_date=pd.to_datetime('2020-02-01'), end_date=pd.to_datetime('2021-12-01')):
        
    start_date, end_date = map(pd.to_datetime, [start_date, end_date])
    df.rename({'cumDeaths28DaysByDeathDate': 'cum_deaths', 'cumPeopleVaccinatedFirstDoseByVaccinationDate': 'cum_vaccinated'}, inplace=True, axis=1)
    df['date_sorter'] = pd.to_datetime(df['date'])
    df[FORM_DATE] =  df['date_sorter'].dt.strftime('%d/%m/%y')
    return df[(df['date_sorter'] >= start_date) & (df['date_sorter'] <= end_date)]


In [12]:
def aggregate_covid_data(cv_data, time_compression='W', ) -> pd.DataFrame:
    if time_compression in ['W', 'M', 'Q']:
        # print(f'time compression {time_compression}')
        cv_data = cv_data.groupby(AREA_CODE).resample(time_compression, on='date_sorter').min().droplevel(AREA_CODE).reset_index()
        cv_data = cv_data.sort_values(by='date_sorter').assign(date = cv_data['date_sorter'].dt.strftime('%d%m%Y')).drop(columns='date_sorter')
        cv_data= cv_data.reset_index(drop=True).fillna(0)
        cv_data = cv_data
    else:    
        print('no time compression selected - expect delayed response times !')
        
        cv_data = cv_data.sort_values('date_sorter').drop(columns='date_sorter').reset_index(drop=True).fillna(0)
    
    return cv_data

In [6]:
def clean_pop_data(df):
    df = df.drop(columns=[col for col in df.columns if col not in ['Code', 'All Ages']]).rename(columns={'Code':AREA_CODE, 'All Ages': POP})
    return df

In [7]:
def add_population_data(df):
    pop_data = pd.read_csv(POP_DATA_PATH, header=0).pipe(clean_pop_data)
    pop_data = pop_data.set_index(AREA_CODE)
    df = df.join(pop_data, on=AREA_CODE, how='left')
    df[DEATH_100K] = (df[DEATHS_COL] / df[POP]) * 1E5
    df[VACC_100K] = (df[VACC_COL] / df[POP]) * 1E5
    df[PCT_VACC] = df[VACC_COL] / df[POP]
    
    return df
    

In [8]:
cv_data = clean_covid_data(COVID_DATA).pipe(add_population_data)

In [9]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, interactive

granularity = widgets.Dropdown(
    options=[('by month', 'M'), ('by week', 'W'), ('by quarter', 'Q')],
    value='M',
    description='Granularity',
    disabled=False,
)


In [10]:
def acquire_data(covid_data, time_compression, start_date: datetime, end_date:datetime)-> pd.DataFrame:

    # clean and aggreate covid data
    covid_data = clean_covid_data(covid_data, start_date=start_date, end_date=end_date)
    covid_data = aggregate_covid_data(covid_data, time_compression=time_compression)
    covid_data = add_population_data(covid_data)
    # get population data 
    pop_data = clean_pop_data(pd.read_csv(POP_DATA_PATH, header=0))
    pop_data = pop_data.set_index(AREA_CODE)
    
    return covid_data



In [55]:
@interact_manual
def plot_map(col_to_plot=[VACC_100K, DEATH_100K, PCT_VACC],
             start_date=widgets.DatePicker(value=date(2020, 1, 1)), 
             end_date=widgets.DatePicker(value=date(2022, 12, 31)),
             granularity = widgets.Dropdown(
                                            options=[('by month', 'M'), ('by week', 'W'), ('by quarter', 'Q')],
                                            value='M',
                                            description='Granularity',
                                            disabled=False,
                                        )

            ):
                                                                                
    cv_data = COVID_DATA.copy()
    cv_data = acquire_data(cv_data, time_compression=granularity, start_date=start_date, end_date=end_date)
    geo_data = load_and_clean_geo_data()
    range_bar_scale =  cv_data[col_to_plot].max()
    fig = px.choropleth_mapbox(data_frame=cv_data,
                               geojson=geo_data,
                               locations=cv_data[AREA_CODE],
                               color=col_to_plot,
                               # center={"lon": -1.88141, "lat": 52.484039},  # Brum
                               # featureidkey='properties.ctyua19cd',
                               zoom=5,
                               color_continuous_scale='RdBu',
                               range_color=(0.50, range_bar_scale),
                               animation_frame='date',
                               hover_name="areaName",
                               hover_data={'date': False, FORM_DATE:True,  POP: True, col_to_plot: True, 'areaCode': False},
                               labels={POP: 'Population 2018',  FORM_DATE: 'Week beginning'},
                               width=800,
                               height=1300,
                               title=MAP_TITLE.format(map_title=col_to_plot),
                              )
    fig.update_layout(mapbox_style="carto-positron")
    # fig.update_layout(margin={"r": 0, "t": 20, "l": 0, "b": 0})
    fig.update_layout(mapbox_bounds={"west": -8, "east": 2, "south": 49, "north": 61})
    fig.update_geos(fitbounds="locations")
    # fig_url = fig.write_html('covid_deaths_series.html')
    fig.show()

interactive(children=(Dropdown(description='col_to_plot', options=('Vaccinated per 100k', 'Deaths per 100k', '…