# Keshma's Covid-19 Dashboard

This is a template for your DIY Covid Dashboard, which shows the data for England's new cases and number of PCR tests conducted on a particular date. 

In [1]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API

In [2]:
pip install voila==0.3.0a2

Collecting voila==0.3.0a2
  Downloading voila-0.3.0a2-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: voila
  Attempting uninstall: voila
    Found existing installation: voila 0.3.6
    Uninstalling voila-0.3.6:
[31mERROR: Could not install packages due to an OSError: [Errno 13] Permission denied: '/opt/conda/etc/jupyter/jupyter_notebook_config.d/voila.json'
Consider using the `--user` option or check the permissions.
[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

## Load initial data from disk

The data in the initial .json file is load below:

In [4]:
with open("Englandnewcasesandtests.json", "rt") as INFILE: 
    jsondata = json.load(INFILE)

In [5]:
data_list = jsondata['data']
data_list

[{'date': '2022-12-01',
  'NewCasesByPublishDate': 22375,
  'PCRTestsByPublishDate': None},
 {'date': '2022-11-30',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': None},
 {'date': '2022-11-29',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173358841},
 {'date': '2022-11-28',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173337231},
 {'date': '2022-11-27',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173319956},
 {'date': '2022-11-26',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173306678},
 {'date': '2022-11-25',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173290401},
 {'date': '2022-11-24',
  'NewCasesByPublishDate': 20261,
  'PCRTestsByPublishDate': 173272799},
 {'date': '2022-11-23',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173240581},
 {'date': '2022-11-22',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPublishDate': 173216513},
 {'date': '2022-11-21',
  'NewCasesByPublishDate': 0,
  'PCRTestsByPubli

## Wrangle the data

The logic to wrangle the raw data into a ```DataFrame``` that will be used for plotting is listed below. 

In [6]:
dates = [dictionary['date'] for dictionary in data_list]
dates.sort()
dates

['2020-01-31',
 '2020-02-01',
 '2020-02-02',
 '2020-02-03',
 '2020-02-04',
 '2020-02-05',
 '2020-02-06',
 '2020-02-07',
 '2020-02-08',
 '2020-02-09',
 '2020-02-10',
 '2020-02-11',
 '2020-02-12',
 '2020-02-13',
 '2020-02-14',
 '2020-02-15',
 '2020-02-16',
 '2020-02-17',
 '2020-02-18',
 '2020-02-19',
 '2020-02-20',
 '2020-02-21',
 '2020-02-22',
 '2020-02-23',
 '2020-02-24',
 '2020-02-25',
 '2020-02-26',
 '2020-02-27',
 '2020-02-28',
 '2020-02-29',
 '2020-03-01',
 '2020-03-02',
 '2020-03-03',
 '2020-03-04',
 '2020-03-05',
 '2020-03-06',
 '2020-03-07',
 '2020-03-08',
 '2020-03-09',
 '2020-03-10',
 '2020-03-11',
 '2020-03-12',
 '2020-03-13',
 '2020-03-14',
 '2020-03-15',
 '2020-03-16',
 '2020-03-17',
 '2020-03-18',
 '2020-03-19',
 '2020-03-20',
 '2020-03-21',
 '2020-03-22',
 '2020-03-23',
 '2020-03-24',
 '2020-03-25',
 '2020-03-26',
 '2020-03-27',
 '2020-03-28',
 '2020-03-29',
 '2020-03-30',
 '2020-03-31',
 '2020-04-01',
 '2020-04-02',
 '2020-04-03',
 '2020-04-04',
 '2020-04-05',
 '2020-04-

In [7]:
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

In [8]:
start_date = parse_date(dates[0])
end_date = parse_date(dates[-1])
print(start_date,' to ',end_date) 

2020-01-31 00:00:00  to  2022-12-01 00:00:00


In [9]:
index = pd.date_range(start_date, end_date, freq='D')
Englandnewcasesandtestsdf=pd.DataFrame(index = index, columns = ['NewCasesByPublishDate', 'PCRTestsByPublishDate'])
Englandnewcasesandtestsdf

Unnamed: 0,NewCasesByPublishDate,PCRTestsByPublishDate
2020-01-31,,
2020-02-01,,
2020-02-02,,
2020-02-03,,
2020-02-04,,
...,...,...
2022-11-27,,
2022-11-28,,
2022-11-29,,
2022-11-30,,


In [10]:
for entry in data_list:
    date = parse_date(entry['date'])
    for column in ['NewCasesByPublishDate', 'PCRTestsByPublishDate']:
        if pd.isna(Englandnewcasesandtestsdf.loc[date, column]):
            value = float(entry[column]) if entry[column]!=None else 0.0
            Englandnewcasesandtestsdf.loc[date,column]=value

Englandnewcasesandtestsdf.fillna(0.0, inplace=True)

Englandnewcasesandtestsdf

Unnamed: 0,NewCasesByPublishDate,PCRTestsByPublishDate
2020-01-31,2.0,0.0
2020-02-01,0.0,0.0
2020-02-02,0.0,0.0
2020-02-03,0.0,0.0
2020-02-04,0.0,0.0
...,...,...
2022-11-27,0.0,173319956.0
2022-11-28,0.0,173337231.0
2022-11-29,0.0,173358841.0
2022-11-30,0.0,0.0


## Download current data

The fucntions below gives users an option to refresh the dataset - a "refresh" button will do. The button callback will
* call the code that accesses the API and download some fresh raw data;
* wrangle that data into a dataframe and update the corresponding (global) variable for plotting;
* optionally: force a redraw of the graph and give the user some fredback.

In [11]:
def wrangle_data(rawdata):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """
    df=pd.DataFrame(index=index, columns=['NewCasesByPublishDate', 'newCasesByPublishDate'])
    for entry in data_list:
        date = parse_date(entry['date'])
        for column in ['NewCasesByPublishDate', 'PCRTestsByPublishDate']:
            if pd.isna(Englandnewcasesandtestsdf.loc[date, column]):
                value = float(entry[column]) if entry[column]!=None else 0.0
                Englandnewcasesandtestsdf.loc[date,column]=value
    return df

df=wrangle_data(jsondata)

In [12]:
def access_api():
    """ Accesses the PHE API. Returns raw data in the same format as data loaded from the "canned" JSON file. """
    return {}

def access_api(button):
    filters = [
    'areaType=nation', 
    'areaName=England'
    ]
    structure = {
    "date": "date",
    "NewCasesByPublishDate":"newCasesByPublishDate",
    "PCRTestsByPublishDate":"cumPCRTestsByPublishDate"
    }
    api = Cov19API(filters=filters, structure=structure)

    Englandnewcasesandtests = api.get_json()
    apidata=access_api(Englandnewcasesandtests)
    
    global df
    df=wrangle_data(apidata)
    refresh_graph()
    apibutton.icon="check"
    #apibutton.disabled=True
   
apibutton=wdg.Button(
    description='Refresh data',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to download current Public Health England data',
    icon='download' # (FontAwesome names without the `fa-` prefix)
)

apibutton.on_click(access_api)

display(apibutton)

Button(button_style='success', description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Clic…

## Graphs and Analysis

The graph below shows the data for England's new cases and tests by publish date and the user can click on the interactive controls to view data for either data.

In [13]:
Englandnewcasesandtestsdf=pd.read_pickle("Englandnewcasesandtestsdf.pkl")

In [14]:
series=wdg.SelectMultiple(
    options=['NewCasesByPublishDate', 'PCRTestsByPublishDate'],
    value=['NewCasesByPublishDate', 'PCRTestsByPublishDate'],
    rows=2,
    description='Stats:',
    disabled=False
)

scale=wdg.RadioButtons(
    options=['linear', 'log'],
#    value='pineapple', # Defaults to 'pineapple'
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Scale:',
    disabled=False
)

# try replacing HBox with a VBox
controls=wdg.HBox([series, scale])

def newcasesandtests_graph(gcols, gscale):
    if gscale=='linear':
        logscale=False
    else:
        logscale=True
    ncols=len(gcols)
    if ncols>0:
        Englandnewcasesandtestsdf[list(gcols)].plot(logy=logscale)
        plt.show() # important - graphs won't update if this is missing 
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")

# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); 
# capture output in widget graph   
graph=wdg.interactive_output(newcasesandtests_graph, {'gcols': series, 'gscale': scale})

display(controls, graph)

HBox(children=(SelectMultiple(description='Stats:', index=(0, 1), options=('NewCasesByPublishDate', 'PCRTestsB…

Output()

In [15]:

ctrls=wdg.VBox([series, scale])

form=wdg.HBox([graph, ctrls])

display(form)

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Stats:', index=(0, 1), options=('NewCasesB…