# Overview
Some fun visualizations using deck.gl to show corona cases

In [1]:
!pip install pydeck

In [1]:
from kaggle_secrets import UserSecretsClient
import os
user_secrets = UserSecretsClient()
os.environ['MAPBOX_API_KEY']=user_secrets.get_secret("mapbox")

In [1]:
import pydeck as pdk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import FileLink

# Corona Virus Report Data


In [1]:
covid_df = pd.read_csv('../input/corona-virus-report/complete_data_new_format.csv')
covid_df['Date'] = pd.to_datetime(covid_df['Date'])
covid_df = covid_df.sort_values('Date', ascending=False)
covid_df['Country_State'] = covid_df.apply(lambda c_row: c_row['Country/Region']+
                                         (' {Province/State}'.format(**c_row) if isinstance(c_row['Province/State'], str) else ''), 
                                         axis=1)
covid_df.head(3)

In [1]:
# show a few days of data
lat_mean = 48.2323
long_mean = -1.415
view_state = pdk.ViewState(
    longitude=long_mean,
    latitude=lat_mean,
    zoom=3,
    min_zoom=3,
    max_zoom=15,
    pitch=40.5,
    bearing=-27.36)
layer_list = []
for i in reversed(range(4)):
    c_frame_df = covid_df.\
        groupby(['Country_State']).\
        apply(lambda x: x.sort_values('Date', ascending=False).head(i+1).tail(1)).\
        reset_index(drop=True)
    c_frame_df['Radius'] = c_frame_df['Confirmed'].map(lambda x: 2*np.clip(x, 0, 100000))
    c_color = [int(255*x) for x in plt.cm.magma((4-i)/4.0)]
    c_color[3] = 100
    layer_list += [pdk.Layer('ScatterplotLayer',
        c_frame_df,
        get_position=['Long', 'Lat'],
        auto_highlight=True,
        get_radius='Radius',          # Radius is given in meters
        get_fill_color=c_color,  # Set an RGBA value for fill
        pickable=True)]
r = pdk.Deck(layers=layer_list, initial_view_state=view_state)
r.to_html('covid_history.html')
FileLink('covid_history.html')

# More Granular Data
Here we use the open_line_list data and see how that works

In [1]:
covid_open_df = pd.read_csv('../input/novel-corona-virus-2019-dataset/COVID19_open_line_list.csv')
covid_open_df['date_confirmation'] = pd.to_datetime(covid_open_df['date_confirmation'], errors='ignore')
covid_open_df.sample(5).T

In [1]:
view_state = pdk.ViewState(
    longitude=8,
    latitude=45.2323,
    zoom=4,
    min_zoom=2,
    max_zoom=15,
    pitch=40.5,
    bearing=-27.36)
hex_layer = pdk.Layer(
    'HexagonLayer',
    covid_open_df,
    get_position=['longitude', 'latitude'],
    auto_highlight=True,
    elevation_scale=500,
    pickable=True,
    elevation_range=[0, 3000],
    radius=50000,
    extruded=True,                 
    coverage=.5)
r = pdk.Deck(layers=[hex_layer], initial_view_state=view_state)
r.to_html('covid_hex.html')

In [1]:
covid_open_df[covid_open_df['country']=='United Kingdom'].T

In [1]:
lat_mean = 45.2323
long_mean = 9
view_state = pdk.ViewState(
    longitude=long_mean,
    latitude=lat_mean,
    zoom=6,
    min_zoom=2,
    max_zoom=15,
    pitch=40.5,
    bearing=-27.36)

confirmed_layer = pdk.Layer('ScatterplotLayer',     # Change the `type` positional argument here
    covid_open_df[['longitude', 'latitude', 'additional_information', 'country', 'province', 'city', 'sex', 'age']],
    get_position=['longitude', 'latitude'],
    auto_highlight=True,
    get_radius=10000,          # Radius is given in meters
    get_fill_color=[180, 0, 200, 140],  # Set an RGBA value for fill
    pickable=True)
r = pdk.Deck(layers=[confirmed_layer], initial_view_state=view_state)
r.to_html('covid_scatter.html')

In [1]:
from IPython.display import FileLink
FileLink('covid_scatter.html')

In [1]:
new_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-28-2020.csv')
new_df.sample(3)

In [1]:
count_var = 'Confirmed'
de_agg = new_df[['Lat', 'Long_', count_var]].\
    groupby(['Lat', 'Long_']).\
    apply(lambda x: x.sample(x[count_var].sum(), replace=True)).\
    reset_index(drop=True)
de_agg.shape

In [1]:
view_state = pdk.ViewState(
    longitude=8,
    latitude=45.2323,
    zoom=4,
    min_zoom=2,
    max_zoom=15,
    pitch=40.5,
    bearing=-27.36)
hex_layer = pdk.Layer(
    'HexagonLayer',
    de_agg[['Lat', 'Long_']],
    get_position=['Long_', 'Lat'],
    auto_highlight=True,
    elevation_scale=100,
    pickable=True,
    elevation_range=[0, 3000],
    radius=10000,
    extruded=True,                 
    coverage=.5)
r = pdk.Deck(layers=[hex_layer], initial_view_state=view_state)
r.to_html('covid_better_hex.html')
FileLink('covid_better_hex.html')

In [1]:
grid_view_df = new_df[['Lat', 'Long_']].copy()
grid_view_df['value'] = np.sqrt(new_df['Confirmed'])
grid_view_df['Color'] = new_df['Confirmed'].map(lambda x: [int(255*c) for c in  plt.cm.magma(x/10000)])
view_state = pdk.ViewState(
    longitude=8,
    latitude=45.2323,
    zoom=4,
    min_zoom=0,
    max_zoom=15,
    pitch=40.5,
    bearing=-0)
grid_layer = pdk.Layer(
    'GridCellLayer',
    grid_view_df,
    get_position=['Long_', 'Lat'],
    get_elevation='value',
    get_color='Color',
    auto_highlight=True,
    elevationScale=2000,
    pickable=True,
    extruded=True,
    material=True,
    cellSize=20000,
    coverage=.5)
r = pdk.Deck(layers=[grid_layer], initial_view_state=view_state)
r.to_html('covid_gridcell.html')
FileLink('covid_gridcell.html')

In [1]:
grid_view_df = new_df[['Lat', 'Long_']].copy()
grid_view_df['value'] = np.sqrt(new_df['Confirmed'])
grid_view_df['Color'] = new_df['Confirmed'].map(lambda x: [int(255*c) for c in  plt.cm.Wistia(x/5000)])
view_state = pdk.ViewState(
    longitude=-115,
    latitude=40,
    zoom=4,
    min_zoom=0,
    max_zoom=15,
    pitch=80,
    bearing=90)
grid_layer = pdk.Layer(
    'GridCellLayer',
    grid_view_df,
    get_position=['Long_', 'Lat'],
    get_elevation='value',
    get_color='Color',
    auto_highlight=True,
    elevationScale=6000,
    pickable=True,
    extruded=True,
    material=True,
    cellSize=50000,
    coverage=.5)
r = pdk.Deck(layers=[grid_layer], initial_view_state=view_state)
r.to_html('usa_gridcell.html')
FileLink('usa_gridcell.html')

# Time-series Cases

In [1]:
def get_day_df(month, day):
    date_str = f'{month:02d}-{day:02d}-2020'
    try:
        return pd.read_csv(f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_str}.csv').assign(month=month, day=day, date=date_str)
    except: 
        return None

In [1]:
all_days_list = [get_day_df(month=month, day=day) for month in range(1, 4) for day in range(1, 32)]
all_days_df = pd.concat([x for x in all_days_list if x is not None], sort=False)
all_days_df.sample(3)

In [1]:
all_days_df['date'] = pd.to_datetime(all_days_df['date'])
all_days_df['Last_Update'] = pd.to_datetime(all_days_df['Last_Update'])
all_days_df = all_days_df.sort_values('date')

In [1]:
grid_view_df = all_days_df[['Lat', 'Long_', 'Latitude', 'Longitude', 'date', 'month', 'day']].copy()
grid_view_df['value'] = np.sqrt(all_days_df['Confirmed'])
grid_view_df['Color'] = all_days_df['Confirmed'].map(lambda x: [int(255*c) for c in  plt.cm.YlOrRd(x/5000)])
view_state = pdk.ViewState(
    longitude=-100,
    latitude=40,
    zoom=3,
    min_zoom=0,
    max_zoom=15,
    pitch=45,
    bearing=0)

grid_layer = pdk.Layer(
        'GridCellLayer',
        data=None,
        get_position=pos_cols,
        get_elevation='value',
        get_color='Color',
        auto_highlight=True,
        elevationScale=6000,
        pickable=True,
        extruded=True,
        material=True,
        cellSize=50000)

r = pdk.Deck(layers=[grid_layer], initial_view_state=view_state)

In [1]:
import time
import ipywidgets
from IPython.display import clear_output
display_date = ipywidgets.HTML('Date')
display(display_date)
r.show()
# Show the current visualization

for c_date, c_rows in grid_view_df.query('month==3').groupby(['date']):
    display_date.value = f'<h1>{c_date}</h1>'
    out_rows = c_rows.copy().reset_index(drop=True).dropna(axis=1, how='all')
    if 'Lat' in out_rows:
        pos_cols = ['Long_', 'Lat']
    else:
        pos_cols = ['Longitude', 'Latitude']
    grid_layer.data = out_rows
    
    r.update()
    time.sleep(2)