## Import Libraries

In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#import chart_studio as py
#from plotly.grid_objs import Grid, Column
from plotly.tools import FigureFactory as FF
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
plt.rcParams['figure.figsize'] = [15, 5]
from IPython import display
from ipywidgets import interact, widgets
import pycountry
import pycountry_convert

## Load data for Cases, Deaths and Recovered

In [132]:
# Load Dataset
# Source: RamiKrispin GitHub
dataset_url = 'https://raw.githubusercontent.com/RamiKrispin/coronavirus-csv/master/coronavirus_dataset.csv'

raw_data_all = pd.read_csv(dataset_url)
raw_data_all.head()

Unnamed: 0,Province.State,Country.Region,Lat,Long,date,cases,type
0,,Afghanistan,33.0,65.0,2020-01-22,0,confirmed
1,,Afghanistan,33.0,65.0,2020-01-23,0,confirmed
2,,Afghanistan,33.0,65.0,2020-01-24,0,confirmed
3,,Afghanistan,33.0,65.0,2020-01-25,0,confirmed
4,,Afghanistan,33.0,65.0,2020-01-26,0,confirmed


In [106]:
print('Size/Shape of the dataset: ', raw_data_all.shape)
print('\n')
print('Checking for missing values:')
print(raw_data_all.isnull().sum())
print('\n')
print('Checking data type of each column:')
print(raw_data_all.dtypes)

Size/Shape of the dataset:  (68464, 7)


Checking for missing values:
Province.State    48136
Country.Region        0
Lat                   0
Long                  0
date                  0
cases                 0
type                  0
dtype: int64


Checking data type of each column:
Province.State     object
Country.Region     object
Lat               float64
Long              float64
date               object
cases               int64
type               object
dtype: object


## Spread of COVID-19 around the World

[Solution for datetime in plot animation](https://github.com/plotly/plotly.py/issues/1737)

In [133]:
# Copy dataframe, as dfs are mutable
df_coords = raw_data_all.copy()

df_coords['coords'] = df_coords[['Long', 'Lat']].values.tolist()

# convert to string, otherwise not hashable & can't be used for comparison
# https://stackoverflow.com/questions/14535730/what-does-hashable-mean-in-python
df_coords['coords'] = df_coords['coords'].astype('str') 
df_coords.drop(['Province.State', 'Country.Region'], axis=1, inplace=True)

In [134]:
df_coords.head()

Unnamed: 0,Lat,Long,date,cases,type,coords
0,33.0,65.0,2020-01-22,0,confirmed,"[65.0, 33.0]"
1,33.0,65.0,2020-01-23,0,confirmed,"[65.0, 33.0]"
2,33.0,65.0,2020-01-24,0,confirmed,"[65.0, 33.0]"
3,33.0,65.0,2020-01-25,0,confirmed,"[65.0, 33.0]"
4,33.0,65.0,2020-01-26,0,confirmed,"[65.0, 33.0]"


In [135]:
selected_type = 'confirmed'

#print(df_coords.shape)
#print(df_coords[df_coords['type'] == selected_type].shape)

df_selected = df_coords[df_coords['type'] == selected_type]

#print(len(df_selected['coords'].unique()))

df_selected['cumulative'] = df_selected.groupby(['coords'])['cases'].apply(lambda x: x.cumsum())



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [136]:
df_selected.tail()

Unnamed: 0,Lat,Long,date,cases,type,coords,cumulative
23227,29.1832,120.0934,2020-04-14,0,confirmed,"[120.0934, 29.1832]",1267
23228,29.1832,120.0934,2020-04-15,1,confirmed,"[120.0934, 29.1832]",1268
23229,29.1832,120.0934,2020-04-16,0,confirmed,"[120.0934, 29.1832]",1268
23230,29.1832,120.0934,2020-04-17,0,confirmed,"[120.0934, 29.1832]",1268
23231,29.1832,120.0934,2020-04-18,0,confirmed,"[120.0934, 29.1832]",1268


In [137]:
# TODO: update styles
# https://plotly.com/python-api-reference/generated/plotly.express.scatter_geo.html

fig = px.scatter_geo(df_selected,
                     lat='Lat', lon='Long',
                     size='cumulative',
                     animation_frame='date'
                    )

fig.update_layout(
    title_text = 'Spread of Coronavirus around the world'
)

fig.show()

In [123]:
# Change animation speed
# https://community.plotly.com/t/how-to-slow-down-animation-in-plotly-express/31309/5

fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 300

## Clean Up Province vs Country

## Cases on Choropleth Map

In [138]:
raw_data_all.head()

Unnamed: 0,Province.State,Country.Region,Lat,Long,date,cases,type
0,,Afghanistan,33.0,65.0,2020-01-22,0,confirmed
1,,Afghanistan,33.0,65.0,2020-01-23,0,confirmed
2,,Afghanistan,33.0,65.0,2020-01-24,0,confirmed
3,,Afghanistan,33.0,65.0,2020-01-25,0,confirmed
4,,Afghanistan,33.0,65.0,2020-01-26,0,confirmed


## Analysis by Date