[DIY Covid-19 Dashboard Kit](https://github.com/fsmeraldi/diy-covid19dash) (C) Fabrizio Smeraldi, 2020 ([f.smeraldi@qmul.ac.uk](mailto:f.smeraldi@qmul.ac.uk) - [web](http://www.eecs.qmul.ac.uk/~fabri/)). All rights reserved.

# DIY Covid-19 Dashboard

In [1]:
# setup
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API

%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100


In [2]:

# data wrangling functions

def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")


def wrangle_data(data):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """

    datalist=data['data']

    dates=[dictionary['date'] for dictionary in datalist ]
    dates.sort()

    startdate=parse_date(dates[0])
    enddate=parse_date(dates[-1])

    index=pd.date_range(startdate, enddate, freq='D')
    timeseriesdf=pd.DataFrame(index=index, columns=['hospital', 'deaths'])

    for entry in datalist: # each entry is a dictionary with date, hospital and deaths
        date=parse_date(entry['date'])
        for column in ['hospital', 'deaths']:
            # check that nothing is there yet - just in case some dates are duplicated,
            # maybe with data for different columns in each entry
            if pd.isna(timeseriesdf.loc[date, column]): 
                # replace None with 0 in our data 
                value= float(entry[column]) if entry[column]!=None else 0.0
                # this is the way you access a specific location in the dataframe - use .loc
                # and put index,column in a single set of [ ]
                timeseriesdf.loc[date, column]=value
                
    # fill in any remaining "holes" due to missing dates
    timeseriesdf.fillna(0.0, inplace=True)


    return timeseriesdf


In [3]:
# get saved data and wrangle it
with open("timeseries.json", "rt") as INFILE:
    data=json.load(INFILE)

timeseriesdf = wrangle_data(data)

# pandas makes saving to a pickle file dead easy:
timeseriesdf.to_pickle("timeseriesdf.pkl")
timeseriesdf=pd.read_pickle("timeseriesdf.pkl")

In [4]:
series=wdg.SelectMultiple(
    options=['hospital', 'deaths'],
    value=['hospital', 'deaths'],
    rows=3,
    description='Stats:',
    disabled=False
)

# try replacing HBox with a VBox
controls=wdg.HBox([series])

def timeseries_graph(gcols):
    ncols=len(gcols)
    if ncols>0:
        timeseriesdf[list(gcols)].plot()
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")

# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); capture output in variable graph   
graph=wdg.interactive_output(timeseries_graph, {'gcols': series})

display(controls, graph)

HBox(children=(SelectMultiple(description='Stats:', index=(0, 1), options=('hospital', 'deaths'), rows=3, valu…

Output()

In [5]:
# Place your API access code in this function. Do not call this function directly; it will be called by 
# the button callback. 

def access_api():
    """ Accesses the PHE API. Returns raw data in the same format as data loaded from the "canned" JSON file. """

    print("Getting data...")

    filters = [
        'areaType=overview', # note each metric-value pair is inside one string
    ]


    # values here are the names of the PHE metrics
    structure = {
        "date": "date",
        "hospital": "newAdmissions",
        "deaths": "cumDeaths28DaysByDeathDateRate"    
    }


    api = Cov19API(filters=filters, structure=structure)

    timeseries=api.get_json()
    print("... data retrieved")
    return timeseries # return data read from the API

    # Printout from this function will be lost in Voila unless captured in an
# output widget - therefore, we give feedback to the user by changing the 
# appearance of the button
def api_button_callback(button):
    """ Button callback - it must take the button as its parameter (unused in this case).
    Accesses API, wrangles data, updates global variable df used for plotting. """
    # Get fresh data from the API. If you have time, include some error handling
    # around this call.
    apidata=access_api()
    # wrangle the data and overwrite the dataframe for plotting
    global df
    df=wrangle_data(apidata)
    # the graph won't refresh until the user interacts with the widget.
    # this function simulates the interaction, see Graph and Analysis below.
    # you can omit this step in the first instance
    refresh_graph()
    # after all is done, you can switch the icon on the button to a "check" sign
    # and optionally disable the button - it won't be needed again. You can use icons
    # "unlink" or "times" and change the button text to "Unavailable" in case the 
    # api call fails.
    apibutton.icon="check"
    # apibutton.disabled=True

    
apibutton=wdg.Button(
    description='Get Data', # you may want to change this...
    disabled=False,
    button_style='Info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip="This will get the latest data for you",
    # FontAwesome names without the `fa-` prefix - try "download"
    icon='download'
)

# remember to register your button callback function with the button
apibutton.on_click(api_button_callback) # the name of your function inside these brackets

display(apibutton)

# run all cells before clicking on this button

timeseries = access_api()

with open("timeseries.json", "wt") as OUTF:
    json.dump(timeseries, OUTF)

Button(button_style='info', description='Get Data', icon='download', style=ButtonStyle(), tooltip='This will g…

**Author and Copyright Notice** *This dashboard was made by [Naomi Christie](https://github.com/nchristie) Based on UK Government [data](https://coronavirus.data.gov.uk/) published by [Public Health England](https://www.gov.uk/government/organisations/public-health-england).*