In [42]:
# Load Packages

import pandas as pd
import numpy as np


# Standard plotly imports
#import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode, plot

In [31]:
## Load Data
covid = pd.read_csv('./Data/covid_19_data.csv')

mob = pd.read_csv('./Data/Global_Mobility_Report .csv')
mob = mob[mob['country_region']=='Singapore'] # restrict to Singapore

<a id='Frequencies'></a>
## **2. Confirmed Cases and Deaths Across Countries/Cities** ##

First lets look at the worldwide COVID-19 cases to see where Singapore is standing compared to other countries

In [32]:
covid.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [36]:
covid.isnull().sum() # check for null values

SNo                    0
ObservationDate        0
Province/State     14187
Country/Region         0
Last Update            0
Confirmed              0
Deaths                 0
Recovered              0
dtype: int64

In [39]:
covid.ObservationDate.min(), covid.ObservationDate.max()

('01/22/2020', '05/21/2020')

In [40]:
# Here we get the latest confirmed, deaths and recovered per country per city
latestcovid = covid.groupby(['Country/Region','Province/State'],as_index=False).agg({'Confirmed': 'max', 'Deaths': 'max', 'Recovered': 'max'})

In [41]:
latestcovid.head()

Unnamed: 0,Country/Region,Province/State,Confirmed,Deaths,Recovered
0,Australia,Australian Capital Territory,107.0,3.0,104.0
1,Australia,Diamond Princess cruise ship,0.0,0.0,0.0
2,Australia,External territories,0.0,0.0,0.0
3,Australia,From Diamond Princess,8.0,0.0,0.0
4,Australia,Jervis Bay Territory,0.0,0.0,0.0


In [43]:
# Plot treemaps

fig = px.treemap(latestcovid.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country/Region", "Province/State"], values="Confirmed", height=700, width=900,
                 title='Number of Confirmed Cases',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

fig = px.treemap(latestcovid.sort_values(by='Deaths', ascending=False).reset_index(drop=True), 
                 path=["Country/Region", "Province/State"], values="Deaths", height=700, width=900,
                 title='Number of Deaths',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

fig = px.treemap(latestcovid.sort_values(by='Recovered', ascending=False).reset_index(drop=True), 
                 path=["Country/Region", "Province/State"], values="Recovered", height=700, width=900,
                 title='Number of Recovered',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

We cannot even see Singapore in the treemaps due to its small population. Normalizing each country's data by its population will allow us to compare Singapore with the other countries.

In [15]:
mob.head()


Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
105316,SG,Singapore,,,2020-02-15,-14.0,-9.0,-10.0,-17.0,-4.0,8.0
105317,SG,Singapore,,,2020-02-16,-19.0,-17.0,-18.0,-22.0,-7.0,8.0
105318,SG,Singapore,,,2020-02-17,-12.0,-6.0,-7.0,-14.0,-4.0,7.0
105319,SG,Singapore,,,2020-02-18,-13.0,-9.0,-7.0,-15.0,-5.0,7.0
105320,SG,Singapore,,,2020-02-19,-14.0,-14.0,-6.0,-15.0,-5.0,8.0


In [30]:
# Change date column to datetime and display range
mob.date = pd.to_datetime(mob.date)
mob.date.min(), mob.date.max()

(Timestamp('2020-02-15 00:00:00'), Timestamp('2020-05-16 00:00:00'))