# Analyse COVID-19 data

In [42]:
import pandas as pd
import numpy as np
from bqplot import *
import bqplot.pyplot as bqplt
from ipywidgets import Layout, Dropdown, Button
from ipywidgets import Image as ImageIpy
import matplotlib.colors as colors
from IPython.display import HTML, display

In [2]:
colorListRGB =[ 
[                   0,                   0,   1.000000000000000],
[   1.000000000000000,                   0,                   0],
[                   0,   1.000000000000000,                   0],
[                   0,                   0,   0.172413793103448],
[   1.000000000000000,   0.103448275862069,   0.724137931034483],
[   1.000000000000000,   0.827586206896552,                   0],
[                   0,   0.344827586206897,                   0],
[   0.517241379310345,   0.517241379310345,   1.000000000000000],
[   0.620689655172414,   0.310344827586207,   0.275862068965517],
[                   0,   1.000000000000000,   0.758620689655172],
[                   0,   0.517241379310345,   0.586206896551724],
[                   0,                   0,   0.482758620689655],
[   0.586206896551724,   0.827586206896552,   0.310344827586207],
[   0.965517241379310,   0.620689655172414,   0.862068965517241],
[   0.827586206896552,   0.068965517241379,   1.000000000000000],
[   0.482758620689655,   0.103448275862069,   0.413793103448276],
[   0.965517241379310,   0.068965517241379,   0.379310344827586],
[   1.000000000000000,   0.758620689655172,   0.517241379310345],
[   0.137931034482759,   0.137931034482759,   0.034482758620690],
[   0.551724137931034,   0.655172413793103,   0.482758620689655],
[   0.965517241379310,   0.517241379310345,   0.034482758620690],
[   0.517241379310345,   0.448275862068966,                   0],
[   0.448275862068966,   0.965517241379310,   1.000000000000000],
[   0.620689655172414,   0.758620689655172,   1.000000000000000],
[   0.448275862068966,   0.379310344827586,   0.482758620689655]
]

In [3]:
colorListHTML = []
for color in colorListRGB:
    colorListHTML.append(colors.to_hex(color))

In [4]:
def updatePlot(casesOrDeaths, figure, align, minimum, gradient, rollingMean, interpolate, normalizeString, logY):
    rawData, endDate = getRawData(casesOrDeaths)
    data = manipulateData(rawData, align, minimum, gradient, rollingMean, interpolate, normalizeString)
    updateFigure(figure, data, align, logY, endDate, casesOrDeaths)
    textNormalizeDictName.value = gNormalizeDictName
    updateWidgets()

In [5]:
def getRawData(casesOrDeaths):
    
    global gCasesOrDeaths
    global gRawData
    global gEndDate

    if gCasesOrDeaths != casesOrDeaths:
        gCasesOrDeaths = casesOrDeaths
        if casesOrDeaths.lower() ==  "cases":
            dataFile = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
        else:
            dataFile = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"

        # Get data and clean up
        rawData = pd.read_csv(dataFile, index_col = 1).T
        rawData.drop(["Lat", "Long","Province/State"], inplace = True)
        rawData.drop(["Diamond Princess", "Taiwan*"], axis = "columns", inplace = True)

        # rawData.drop(["Cruise Ship", "Saint Lucia", "Taiwan*"], axis = "columns", inplace = True)
        rawData.rename(columns={"Iran": "Iran, Islamic Rep.", "Korea, South" : "Korea, Rep.", "US" : "United States", \
                            "Brunei": "Brunei Darussalam", "Czechia": "Czech Republic", "Egypt" : "Egypt, Arab Rep.", \
                            "Russia" : "Russian Federation", "Slovakia" : "Slovak Republic", "Congo (Kinshasa)" : "Congo, Dem. Rep.", \
                            "Guadeloupe" : "Mexico", "Jersey" : "United Kingdom", "Martinique" : "France", \
                            "Reunion" : "France", "Venezuela" : "Venezuela, RB"}, inplace = True)
        rawData = rawData.groupby(rawData.columns, axis=1).sum()
        endDate = pd.to_datetime(rawData.index[-1]).strftime("%d %B, %Y")
        rawData.reset_index(inplace = True, drop=True)
        gRawData = rawData
        gEndDate = endDate
    return gRawData, gEndDate

In [6]:
def updateFigure(figure, data, align, logY, endDate, casesOrDeaths):
    global gLogY
    figure.marks[0].y = data
    figure.marks[0].labels = list(data.index.to_numpy())
    if gLogY != logY:
        gLogY = logY
        if logY == True:
            figure.axes[1].scale = LogScale()
        else:
            figure.axes[1].scale = LinearScale()
        figure.marks[0].scales = {'x': figure.axes[0].scale, 'y': figure.axes[1].scale}
    figure.title = f"Current as of {gEndDate}"
    figure.axes[1].label = casesOrDeaths
    figure.axes[0].label = f"Days since {align} {casesOrDeaths.lower()}"

In [7]:
def manipulateData(rawData, align, minimum, gradient, rollingMean, interpolate, normalizeString):
    
    global gAlign
    global gMinimum
    global gGradient
    global gRollingMean
    global gInterpolate
    global gNormalizeString
    global gNormalizeDict
    global gNormalizeDictName

    gAlign = align
    gMinimum = minimum
    gGradient = gradient
    gRollingMean = rollingMean
    gInterpolate = interpolate
    
    # Align
    data = rawData.where(rawData > align)
    data = data.apply(lambda countryData: pd.Series(countryData.dropna().values))

    # Drop countries without minimum data
    data.dropna(axis = 'columns', thresh = minimum + 1, inplace = True)

    # Interpolate
    if interpolate:
        data.where(data.diff() != 0, inplace = True)
        data.interpolate(method = "linear", limit = 1, limit_direction = "backward", inplace = True)
        data.fillna(method = "backfill", inplace = True)
    
    
    def caca(poo):
        if np.isnan(poo):
            return np.nan
        else:
            return 1e-3
    
    # Take gradients
    if gradient == "New cases/deaths" or gradient == "Derivative of new cases/deaths":
        data = data.diff()
        
        # Rolling mean
        if rollingMean:
            data = data.rolling(rollingMean).mean()

        if gradient == "Derivative of new cases/deaths":
            data = data.diff()
        else:
            data.mask(data <=0, other = 1e-3, inplace = True)
    
    # Normalize
    if gNormalizeString!= normalizeString:
        gNormalizeString = normalizeString
        if normalizeString != '':
            try:
                gNormalizeDict, gNormalizeDictName = worldBank(normalizeString)
            except:
                
                gNormalizeDictName = "Invalid World Bank Code"
        else:
            gNormalizeDict = {}
            gNormalizeDictName = ""
    
    if gNormalizeDict:
        data = data.apply(lambda countryData: countryData / gNormalizeDict[countryData.name])

    return data.T 

In [8]:
def worldBank (code, factor = 1):
    url  = "http://api.worldbank.org/v2/en/indicator/" + code + "?downloadformat=excel"
    normalizeDict = pd.read_excel(url, sheet_name = 0, header = 0 , skiprows = 3,  \
                                    index_col = 0, usecols = [0] + list(range(4,63)))
    normalizeDict = normalizeDict.ffill(axis=1).iloc[:, -1].T * factor
    nameList = pd.read_excel(url, sheet_name = 2, usecols = [1]).to_string(header = False, index = False).split(" ")
    normalizeDictName = " ".join(nameList[1 : min(len(nameList), 4)])
    return normalizeDict.to_dict(), normalizeDictName

In [9]:
def updateWidgets():
    
    if gGradient == "Cumulative":
        sliderRollingMean.disabled = True
    else:
        sliderRollingMean.disabled = False

    if gGradient == "Derivative of new cases/deaths":
        toggleLogY.value = False
        toggleLogY.disabled = True
    else:
        toggleLogY.disabled = False
        
    if gCasesOrDeaths == "Cases":
        sliderAlign.max = 1000
    else:
        sliderAlign.max = 100

In [10]:
gAlign = 10
gMinimum = 15
gGradient = 'Cumulative'
gRollingMean = 0
gInterpolate = False
gCasesOrDeaths = "Cases"
gRawData, gEndDate = getRawData("Deaths")
gNormalizeString = ''
gNormalizeDict = {}
gNormalizeDictName = ''
gLogY = False

In [11]:
toggleCasesOrDeaths = widgets.ToggleButtons(
    options=['Deaths', 'Cases'],
    value=gCasesOrDeaths, 
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Data:',
    disabled=False
)

In [75]:
sliderAlign = widgets.IntSlider(
    value=gAlign,
    min=1,
    max=100,
    step=1,
    description='Align:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    layout = {'width': '50ex'},
)

In [76]:
sliderMinimum = widgets.IntSlider(
    value=gMinimum,
    min=1,
    max=len(gRawData),
    step=1,
    description='Minimum:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    layout = {'width': '50ex'},
)

In [77]:
toggleGradient = widgets.ToggleButtons(
    options=['Cumulative', 'New cases/deaths', 'Derivative of new cases/deaths'],
    value=gGradient, 
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Gradient:',
    disabled=False
)

In [78]:
sliderRollingMean = widgets.IntSlider(
    value=gRollingMean,
    min=1,
    max=15,
    step=1,
    description='Rolling Mean:',
    disabled=True,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    layout = {'width': '50ex'},
)

In [79]:
toggleInterpolate = widgets.ToggleButton(
    value=gInterpolate,
    description='Interpolate',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
#     icon='check' # (FontAwesome names without the `fa-` prefix)
)

In [80]:
textNormalizeString = widgets.Text(
    value='',
    placeholder='Type something',
    description='World Bank Code:',
    disabled=False,
#     layout = widgets.Layout(width='auto'),
)

In [81]:
toggleLogY = widgets.ToggleButton(
    value=gLogY,
    description='Log Y',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
#     icon='check' # (FontAwesome names without the `fa-` prefix)
)

In [82]:
textNormalizeDictName = widgets.HTML(
    value=gNormalizeDictName,
    description= "Normalize by: ",
    layout={'width': 'max-content'}, # If the items' names are long
)

In [83]:
toggleCasesOrDeaths.observe(lambda change : updatePlot(change.new, fig, gAlign, gMinimum, gGradient, gRollingMean, gInterpolate, gNormalizeString, gLogY), 'value')
sliderAlign.observe(lambda change : updatePlot(gCasesOrDeaths, fig, change.new, gMinimum, gGradient, gRollingMean, gInterpolate, gNormalizeString, gLogY), 'value')
sliderMinimum.observe(lambda change : updatePlot(gCasesOrDeaths, fig, gAlign, change.new, gGradient, gRollingMean, gInterpolate, gNormalizeString, gLogY), 'value')
toggleGradient.observe(lambda change : updatePlot(gCasesOrDeaths, fig, gAlign, gMinimum, change.new, gRollingMean, gInterpolate, gNormalizeString, gLogY), 'value')
sliderRollingMean.observe(lambda change : updatePlot(gCasesOrDeaths, fig, gAlign, gMinimum, gGradient, change.new, gInterpolate, gNormalizeString, gLogY), 'value')
toggleInterpolate.observe(lambda change : updatePlot(gCasesOrDeaths, fig, gAlign, gMinimum, gGradient, gRollingMean, change.new, gNormalizeString, gLogY), 'value')
textNormalizeString.observe(lambda change : updatePlot(gCasesOrDeaths, fig, gAlign, gMinimum, gGradient, gRollingMean, gInterpolate, change.new, gLogY), 'value')
toggleLogY.observe(lambda change : updatePlot(gCasesOrDeaths, fig, gAlign, gMinimum, gGradient, gRollingMean, gInterpolate, gNormalizeString, change.new), 'value')

In [84]:
xData = gRawData.index.to_numpy()
yData = manipulateData(gRawData, gAlign, gMinimum, gGradient, gRollingMean, gInterpolate, gNormalizeString)

xScale = LinearScale()
yScale = LinearScale()

defaultTool = Tooltip(fields=['name'], formats=[''], labels=['Country'])
lineChart = Lines(x=xData,
                  y=yData, 
                  scales= {'x': xScale, 'y': yScale}, 
                  tooltip=defaultTool, 
                  display_legend=True, 
                  labels=list(yData.index.to_numpy()),
                  marker_str = 'sr', 
                  colors=colorListHTML,
#                   marker= 'circle',
#                   marker_size= 15,
                  selected_style = {"stroke-width" : 5},
                  unselected_style = {"opacity" : 0.5},
#                   interactions = {'hover': 'tooltip', 'click': 'select'},
#                   interactions = {'hover': 'select'},
                  )

XAxis = Axis(scale=xScale, label = f"Days since {gAlign} {gCasesOrDeaths.lower()}")
YAxis = Axis(scale=yScale, orientation='vertical', tick_format='0.2f', label = gCasesOrDeaths, label_offset = "5em")

fig = Figure(marks=[lineChart], 
             axes=[XAxis, YAxis],
             animation_duration=500, 
             legend_location = 'bottom-right', 
             legend_style = {'stroke': 'none'}, 
             title = f"Current as of {gEndDate}",
             fig_margin={"top" : 80, "left":80, "right":80, "bottom":80},
#              fig_margin={"top" : 80, "left":0, "right":0, "bottom":80},
            )
toolBar = Toolbar(figure = fig)

In [85]:
display(HTML('''<style>
    .widget-label { min-width: 20ex !important; }
</style>'''))

In [86]:
widgets.VBox([
    widgets.HBox([sliderAlign, toggleGradient ]), 
    widgets.HBox([sliderMinimum, sliderRollingMean, toggleInterpolate, toggleLogY]),
    toggleCasesOrDeaths,
    textNormalizeString,
    textNormalizeDictName,
    fig, 
    toolBar
])

VBox(children=(HBox(children=(IntSlider(value=10, description='Align:', layout=Layout(width='50ex'), min=1), T…

#### To do:
- LineStyles
- Persistent colors / styles
- Make legend fit better
- Highlight on hover/click
- Improve layout

In [23]:
# lineChart.interactions

In [24]:
# def clickCallback(line, event):
#     print(event)
#     print("\n\n\n\n\n")
#     raise Exception("fuck meeeee")
# #     line.curves_subset = [event["data"]["index"]]
# #     time.sleep(1)
# #     line.curves_subset = []
# lineChart.on_element_click(clickCallback)

In [25]:
# # import time
# def hoverCallback(line, event):
#     print(event["data"]["index"])
#     print("\n\n\n\n\n")
#     lineChart.selected = event["data"]["index"]
# #     line.curves_subset = [event["data"]["index"]]
# #     time.sleep(1)
# #     line.curves_subset = []

# lineChart.on_hover(hoverCallback)