In [1]:
import pandas
import matplotlib
import seaborn
import sklearn
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go
from dash.dependencies import Input, Output 

---
## 1st Dataset : John Hopkins Data

In [3]:
## John Hopkins Center for Systems Science and Engineer Data Base URL
## Contains timeseries data
## This set of data is updated daily
baseURL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"

cumulated_confirmed_url = baseURL + "time_series_covid19_confirmed_global.csv"
cumulated_deaths_url    = baseURL + "time_series_covid19_deaths_global.csv"
cumulated_recovered_url = baseURL + "time_series_covid19_recovered_global.csv"

cumulated_confirmed     = pd.read_csv(cumulated_confirmed_url)
cumulated_deaths        = pd.read_csv(cumulated_deaths_url)
cumulated_recovered     = pd.read_csv(cumulated_recovered_url)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
print("Confirmed data shape : " + str(cumulated_confirmed.shape))
print("Recovered data shape : " + str(cumulated_recovered.shape))
print("Deaths data shape : " + str(cumulated_deaths.shape))

Confirmed data shape : (266, 158)
Recovered data shape : (253, 158)
Deaths data shape : (266, 158)


In [20]:
cumulated_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,6/14/20,6/15/20,6/16/20,6/17/20,6/18/20,6/19/20,6/20/20,6/21/20,6/22/20,6/23/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,24766,25527,26310,26874,27532,27878,28424,28833,29157,29481
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,1521,1590,1672,1722,1788,1838,1891,1962,1995,2047
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,10919,11031,11147,11268,11385,11504,11631,11771,11920,12076
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,853,853,854,854,855,855,855,855,855,855
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,140,142,148,155,166,172,176,183,186,189


In [22]:
cumulated_confirmed.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,6/14/20,6/15/20,6/16/20,6/17/20,6/18/20,6/19/20,6/20/20,6/21/20,6/22/20,6/23/20
261,,Sao Tome and Principe,0.18636,6.613081,0,0,0,0,0,0,...,661,662,671,683,688,693,698,698,702,707
262,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,728,844,885,902,909,919,922,941,967,992
263,,Comoros,-11.6455,43.3333,0,0,0,0,0,0,...,176,176,197,197,210,210,247,247,247,265
264,,Tajikistan,38.861034,71.276093,0,0,0,0,0,0,...,5035,5097,5160,5221,5279,5338,5399,5457,5513,5567
265,,Lesotho,-29.609988,28.233608,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,12,17


In [41]:
cumulated_confirmed["Province/State"].unique()

array([nan, 'Australian Capital Territory', 'New South Wales',
       'Northern Territory', 'Queensland', 'South Australia', 'Tasmania',
       'Victoria', 'Western Australia', 'Alberta', 'British Columbia',
       'Grand Princess', 'Manitoba', 'New Brunswick',
       'Newfoundland and Labrador', 'Nova Scotia', 'Ontario',
       'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Anhui',
       'Beijing', 'Chongqing', 'Fujian', 'Gansu', 'Guangdong', 'Guangxi',
       'Guizhou', 'Hainan', 'Hebei', 'Heilongjiang', 'Henan', 'Hong Kong',
       'Hubei', 'Hunan', 'Inner Mongolia', 'Jiangsu', 'Jiangxi', 'Jilin',
       'Liaoning', 'Macau', 'Ningxia', 'Qinghai', 'Shaanxi', 'Shandong',
       'Shanghai', 'Shanxi', 'Sichuan', 'Tianjin', 'Tibet', 'Xinjiang',
       'Yunnan', 'Zhejiang', 'Faroe Islands', 'Greenland',
       'French Guiana', 'French Polynesia', 'Guadeloupe', 'Mayotte',
       'New Caledonia', 'Reunion', 'Saint Barthelemy', 'St Martin',
       'Martinique', 'Aruba', 'Curacao', 'Sint

In [39]:
## To find specific instances on certain countries
## Consider grouping by countries to simplify the analysis process
cumulated_confirmed.loc[cumulated_confirmed["Country/Region"]=="China"].head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,6/14/20,6/15/20,6/16/20,6/17/20,6/18/20,6/19/20,6/20/20,6/21/20,6/22/20,6/23/20
49,Anhui,China,31.8257,117.2264,1,9,15,39,60,70,...,991,991,991,991,991,991,991,991,991,991
50,Beijing,China,40.1824,116.4142,14,22,36,41,68,80,...,673,700,731,752,777,777,821,830,843,850
51,Chongqing,China,30.0572,107.874,6,9,27,57,75,110,...,582,582,582,582,582,582,582,582,582,582
52,Fujian,China,26.0789,117.9874,1,5,10,18,35,59,...,362,362,362,362,362,362,363,363,363,363
53,Gansu,China,37.8099,101.0583,0,2,2,4,7,14,...,139,139,149,150,151,151,151,151,158,158


Theres a country count difference between recovered and confirmed

---
## 2nd Dataset : Starschema Enhanced Dataset

In [17]:
star_schema_dataset = pd.read_csv("../Data/JHU_COVID-19- Starschema.csv")

print("Deaths data shape : " + str(star_schema_dataset.shape))

<IPython.core.display.Javascript object>

Deaths data shape : (736148, 14)


In [18]:
temp

Unnamed: 0,Country/Region,Province/State,County,FIPS,Date,Case_Type,Cases,Long,Lat,ISO3166-1,ISO3166-2,Difference,Last_Update_Date,Last_Reported_Flag
0,Afghanistan,,,,2020-01-22 00:00:00.000000,Confirmed,0,65.0,33.0,AF,,,2020-06-22 05:32:47.332978,False
1,Afghanistan,,,,2020-02-24 00:00:00.000000,Confirmed,1,65.0,33.0,AF,,1.0,2020-06-22 05:32:47.332978,False
2,Afghanistan,,,,2020-02-25 00:00:00.000000,Confirmed,1,65.0,33.0,AF,,0.0,2020-06-22 05:32:47.332978,False
3,Afghanistan,,,,2020-02-26 00:00:00.000000,Confirmed,1,65.0,33.0,AF,,0.0,2020-06-22 05:32:47.332978,False
4,Afghanistan,,,,2020-02-27 00:00:00.000000,Confirmed,1,65.0,33.0,AF,,0.0,2020-06-22 05:32:47.332978,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736143,United States,Wyoming,unassigned,,2020-06-18 00:00:00.000000,Deaths,17,,,US,WY,0.0,2020-06-22 05:32:47.332978,False
736144,United States,Wyoming,unassigned,,2020-06-19 00:00:00.000000,Deaths,19,,,US,WY,2.0,2020-06-22 05:32:47.332978,False
736145,United States,Wyoming,unassigned,,2020-06-20 00:00:00.000000,Deaths,19,,,US,WY,0.0,2020-06-22 05:32:47.332978,False
736146,United States,Wyoming,unassigned,,2020-06-21 00:00:00.000000,Deaths,19,,,US,WY,0.0,2020-06-22 05:32:47.332978,True


---
## 3rd Dataset