# Analyse CoViD-19 data

Import necessary packages

In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
from adjustText import adjust_text
import matplotlib.patheffects as pe
import mplcursors
from bqplot import *
import bqplot.pyplot as bqplt
from ipywidgets import Layout, Dropdown, Button
from ipywidgets import Image as ImageIpy

Define function that retrieves raw data from John Hopkins

In [35]:
def getRawData(casesOrDeaths):
# Get URLs for data
    if casesOrDeaths.lower() ==  "cases":
        dataFile = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
    else:
        dataFile = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"

    # Get data and clean up
    rawData = pd.read_csv(dataFile, index_col = 1).T
    rawData.drop(["Lat", "Long","Province/State"], inplace = True)
    rawData.drop(["Diamond Princess"], axis = "columns", inplace = True)

    # rawData.drop(["Cruise Ship", "Saint Lucia", "Taiwan*"], axis = "columns", inplace = True)
    rawData.rename(columns={"Iran": "Iran, Islamic Rep.", "Korea, South" : "Korea, Rep.", "US" : "United States", \
                        "Brunei": "Brunei Darussalam", "Czechia": "Czech Republic", "Egypt" : "Egypt, Arab Rep.", \
                        "Russia" : "Russian Federation", "Slovakia" : "Slovak Republic", "Congo (Kinshasa)" : "Congo, Dem. Rep.", \
                        "Guadeloupe" : "Mexico", "Jersey" : "United Kingdom", "Martinique" : "France", \
                        "Reunion" : "France", "Venezuela" : "Venezuela, RB"}, inplace = True)
    rawData = rawData.groupby(rawData.columns, axis=1).sum()
    endDate = pd.to_datetime(rawData.index[-1]).strftime("%d %B, %Y")
    rawData.reset_index(inplace = True, drop=True)
    return rawData

Initialise global variables

In [36]:
gAlign = 10
gMinimum = 15
gGradient = 'Cumulative'
gRollingMean = 0
gInterpolate = True
gRawData = getRawData("deaths")

Define function that updates the plot, called by all the interavtive buttons

In [37]:
def updatePlot(rawData, lineChart, align, minimum, gradient, rollingMean, interpolate):
    data = manipulateData(rawData, align, minimum, gradient, rollingMean, interpolate)
    lineChart.y = data
    lineChart.labels = list(data.index.to_numpy())

Define function that manipulates the data according to the input from the interactive buttons

In [46]:
def manipulateData(rawData, align, minimum, gradient, rollingMean, interpolate):
    
    global gAlign
    global gMinimum
    global gGradient
    global gRollingMean
    global gInterpolate

    gAlign = align
    gMinimum = minimum
    gGradient = gradient
    gRollingMean = rollingMean
    gInterpolate = interpolate
    
    data = rawData.where(rawData > align)
    data = data.apply(lambda countryData: pd.Series(countryData.dropna().values))

    # Drop countries without minimum data
    data.dropna(axis = 'columns', thresh = minimum, inplace = True)

    # Interpolate
    if interpolate:
        data.where(data.diff() != 0, inplace = True)
        data.interpolate(method = "linear", limit = 2, limit_direction = "backward", inplace = True)

    # Take gradients
    if gradient == "New cases/deaths" or gradient == "Derivative of new cases/deaths":
        data = data.diff()

        # Rolling mean
        if rollingMean:
            data = data.rolling(rollingMean).mean()

        if gradient == "Derivative of new cases/deaths":
            data = data.diff()

#     # Truncate
#     data = data.truncate(after = cutoff)
    return data.T
# # Normalize
# for individualNormalizeDict in normalizeDicts:
#     data = data.apply(lambda countryData: countryData / individualNormalizeDict[countryData.name])


Create a toggle widget to change between displaying deaths or cases

In [44]:
toggleCasesOrDeaths = widgets.ToggleButtons(
    options=['Deaths', 'Cases'],
    value="Deaths", 
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Data:',
    disabled=False
)

Create a slider widget that can vary at what value the cases or deaths are aligned at

In [45]:
sliderAlign = widgets.IntSlider(
    value=gAlign,
    min=1,
    max=100,
    step=1,
    description='Align:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

Create a slider widget that can vary at what minimum amount of data points are needed for a country to be displayed

In [27]:
sliderMinimum = widgets.IntSlider(
    value=gMinimum,
    min=1,
    max=len(gRawData),
    step=1,
    description='Minimum:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

Create a toggle widget to change between displaying cumulative, new cases/deaths, or the derivative or new/cases/deaths

In [47]:
toggleGradient = widgets.ToggleButtons(
    options=['Cumulative', 'New cases/deaths', 'Derivative of new cases/deaths'],
    value=gGradient, 
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Gradient:',
    disabled=False
)

Create a slider widget that can vary the rolling mean of new cases/deaths

In [48]:
sliderRollingMean = widgets.IntSlider(
    value=gRollingMean,
    min=0,
    max=15,
    step=1,
    description='Rolling Mean:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

Create a toggle widget to enable or disable interpolation when data is missing

In [49]:
toggleInterpolate = widgets.ToggleButton(
    value=gInterpolate,
    description='Interpolate',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

Link the interactive buttons to the `updatePlot` function

In [50]:
toggleCasesOrDeaths.observe(lambda change : updatePlot(getRawData(change.new), lineChart, gAlign, gMinimum, gGradient, gRollingMean, gInterpolate), 'value')
sliderAlign.observe(lambda change : updatePlot(gRawData, lineChart, change.new, gMinimum, gGradient, gRollingMean, gInterpolate), 'value')
sliderMinimum.observe(lambda change : updatePlot(gRawData, lineChart, gAlign, change.new, gGradient, gRollingMean, gInterpolate), 'value')
toggleGradient.observe(lambda change : updatePlot(gRawData, lineChart, gAlign, gMinimum, change.new, gRollingMean, gInterpolate), 'value')
sliderRollingMean.observe(lambda change : updatePlot(gRawData, lineChart, gAlign, gMinimum, gGradient, change.new, gInterpolate), 'value')
toggleInterpolate.observe(lambda change : updatePlot(rawgRawDataData, lineChart, gAlign, gMinimum, gGradient, gRollingMean, change.new), 'value')

Create a line chart with the initialised data

In [62]:
xData = gRawData.index.to_numpy()
yData = manipulateData(gRawData, gAlign, gMinimum, gGradient, gRollingMean, gInterpolate)

fig = bqplt.figure(animation_duration=500, legend_location = 'top-left')
lineChart = bqplt.plot(x=xData, y=yData, display_legend = True, labels = list(yData.index.to_numpy()), marker_str = 'sr')

Stack the widgets and fig and display

In [65]:
widgets.VBox([widgets.HBox([sliderAlign, toggleGradient ]), widgets.HBox([sliderMinimum, sliderRollingMean, toggleInterpolate]), toggleCasesOrDeaths, fig])

VBox(children=(HBox(children=(IntSlider(value=100, continuous_update=False, description='Align:', min=1), Togg…