# Overview
- Basically trying to reproduce this graph: https://twitter.com/MarkJHandley/status/1237119688578138112/photo/1 showing a linear (on a semilog scale) growth and a particularly slow/quick peaking growth in Japan.
- I try to also see how number of people infected relates to the average spread in a country
- Finally I drop the country pretense and just look at grids of latitute and longitude

In [None]:
!pip install trimesh ipyvolume==0.6.0a2 

In [None]:
from itertools import cycle
import plotly_express as px
import seaborn as sns
import matplotlib.pyplot as plt
import bqplot
import ipyvolume as ipv
import trimesh
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
%matplotlib inline
plt.rcParams["figure.figsize"] = (15, 10)
plt.rcParams["figure.dpi"] = 125
plt.rcParams["font.size"] = 14
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid': False})
plt.rcParams['image.cmap'] = 'gray'  # grayscale looks better
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']

In [None]:
corona_df = pd.read_csv("../input/corona-virus-report/covid_19_clean_complete.csv")
corona_df['DateCode'] = pd.to_datetime(corona_df['Date'])
corona_df.head(5)

In [None]:
date_country_df = corona_df.\
    groupby(['DateCode', 'Country/Region']).\
    agg({'Confirmed': 'sum', 'Deaths': 'sum', 'Recovered': 'sum'}).\
    reset_index()
date_country_df.head(3)

In [None]:
px.line(date_country_df.query('Confirmed>0'),
        x='DateCode',
        y='Confirmed',
        color='Country/Region',
        log_y=True)

## Big Summary Table
This should show all the countries, not sure why they are missing

In [None]:
fig, ax1 = plt.subplots(1, 1, figsize=(15, 15))
sns.heatmap(
    corona_df.
    pivot_table(index='Country/Region', columns='Date', values='Confirmed', aggfunc='sum').
    sort_values('2020-04-05').
    applymap(lambda x: np.log(x) if x > 0 else -1),
    ax=ax1
)

In [None]:
px.scatter_geo(corona_df.
               assign(grp=lambda c_df: c_df.apply(lambda c_row: '{Lat}-{Long}'.format(**c_row), axis=1),
                      size=lambda c_df: np.sqrt(
                          np.clip(c_df['Active'], 0, 1e6))+1
                      ),
               lat='Lat',
               lon='Long',
               color='Deaths',
               range_color=[0, 100_000],
               animation_frame='Date',
               animation_group='grp',
               hover_data=['Active', 'Confirmed', 'Deaths', 'Recovered'],
               size='size')

In [None]:
filt_rows = lambda x: x[(x['Date'] > '2020-02-20') & (x['Country/Region'].isin(['Canada']))]
filt_rows = lambda x: x
pv_df = filt_rows(corona_df).pivot_table(index=['Lat', 'Long'],
                                                                columns='Date', values='Active',
                                                                aggfunc='sum').\
    applymap(lambda x: np.log(np.clip(x, 1, 10000))).\
    reset_index()
pv_df.head(3)

In [None]:
# lat/long offsets were guessed and checked to match the weird globe model I had
theta = (90-pv_df['Lat'].values) * np.pi/180.0 
phi = (pv_df['Long'].values+90) * np.pi/180.0
base_radius = 100
rad_values = pv_df.iloc[:, 2:].values.T

In [None]:
interval_ms = 100
zs = base_radius * np.cos(phi) * np.sin(theta)
xs = base_radius * np.sin(phi) * np.sin(theta)
ys = base_radius * np.cos(theta)

vz = np.cos(phi) * np.sin(theta)
vx = np.sin(phi) * np.sin(theta)
vy = np.cos(theta)

# Now also include, color, which containts rgb values
color = plt.cm.magma((rad_values)/(rad_values.mean()+rad_values.std())).swapaxes(1, 2)
color = np.transpose(color, (0, 2, 1))  # flip the last axes

# the aux range is from -1 to 1, but if we put 0 as min, negative values will go inside
# the max determines the 'height' of the bars
aux_scale = bqplot.LinearScale(min=0, max=rad_values.max())

In [None]:
c_mesh = trimesh.load('../input/globe-model/Globe.obj')
fig = ipv.figure()
# we use the coordinates as the normals, and thus direction
for c_color, (k, v) in zip([None, 'green', 'blue'], c_mesh.geometry.items()):
    if c_color is not None:
        ipv.plot_trisurf(*v.vertices.T, triangles=v.faces, color=c_color)
s = ipv.scatter(xs, ys, zs, vx=vx, vy=vy, vz=vz, color=color,
                aux=rad_values, marker="cylinder_hr")
ipv.xyzlim(base_radius+rad_values.max())
ipv.animation_control(s, interval=interval_ms)
s.aux_scale = aux_scale
s.shader_snippets = {'size':
                     '''float sc = (SCALE_AUX(aux_current) - SCALE_AUX(0.0)); size_vector.y = sc;
 '''}
s.material.side = "DoubleSide"
s.size = 1
s.geo_matrix = [1, 0, 0, 0,   0, 1, 0, 0,   0, 0, 1, 0,  0.0, 0.5, 0, 1]
ipv.squarelim()
ipv.style.use("nobox")
ipv.style.box_off()
ipv.style.axes_off()
ipv.show()

In [None]:

def set_view(figure, framenr, fraction):
    s.sequence_index = framenr-1
if False: # gif export doesn't work well on kaggle
    ipv.movie('covid_spread.gif', set_view, fps=1000/interval_ms, frames=rad_values.shape[0])

In [None]:
ipv.pylab.save('covid_spread.html')

# LatLong Grids
Since confirmed infections are a fairly difficult to measure number (and highly depenedent on testing). Using deaths as an end-point is probably more reliable

In [None]:
corona_df['QLat'] = pd.cut(corona_df['Lat'], 100)
corona_df['QLong'] = pd.cut(corona_df['Long'], 100)

In [None]:
def summarize_grid(in_rows):
    return in_rows.\
        groupby('DateCode').\
        agg({'Confirmed': 'sum', 'Deaths': 'sum', 'Recovered': 'sum',
             'Country/Region': 'first', 'Province/State': 'first'}).\
        reset_index()


def cut_to_num(in_str: str) -> float:
    """Takes the middle of qcut range"""
    clean_str = str(in_str).replace('(', '').replace(
        '[', '').replace(']', '').replace(')', '')
    return np.mean([float(x) for x in clean_str.split(',')])


date_grid_df = corona_df.\
    groupby(['QLat', 'QLong']).\
    apply(summarize_grid).\
    reset_index().\
    dropna().\
    assign(Lat=lambda x: x['QLat'].astype(str).map(cut_to_num),
           Long=lambda x: x['QLong'].astype(str).map(cut_to_num))
date_grid_df.head(3)

In [None]:
sum_grid_df = date_grid_df.groupby(['Lat', 'Long']).agg(
    {'Confirmed': 'sum'}).reset_index().query('Confirmed>0')

In [None]:
from mpl_toolkits.basemap import Basemap
world_map = Basemap(projection='ortho', lat_0=45, lon_0=100, resolution='l')
world_map.drawcoastlines(linewidth=0.25)
world_map.drawcountries(linewidth=0.25)
world_map.fillcontinents(color='lightgreen', lake_color='aqua', alpha=0.25)

world_map.scatter(sum_grid_df['Long'].values,
                  sum_grid_df['Lat'].values,
                  s=10*np.log10(sum_grid_df['Confirmed']),
                  c='r',
                  latlon=True)