In [43]:
import numpy as np
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px 
from plotly.offline import download_plotlyjs,init_notebook_mode, iplot
import plotly.tools as tls 
import plotly.figure_factory as ff
import folium
from folium.plugins import MarkerCluster

In [19]:
covid_data = pd.read_csv('Datasets/covid_19_data.csv')
confimered_cases_data = pd.read_csv('Datasets/time_series_covid_19_confirmed.csv')

In [20]:
covid_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116805 entries, 0 to 116804
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   SNo              116805 non-null  int64  
 1   ObservationDate  116805 non-null  object 
 2   Province/State   81452 non-null   object 
 3   Country/Region   116805 non-null  object 
 4   Last Update      116805 non-null  object 
 5   Confirmed        116805 non-null  float64
 6   Deaths           116805 non-null  float64
 7   Recovered        116805 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 7.1+ MB


In [21]:
covid_data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [22]:
covid_data.isnull().sum()

SNo                    0
ObservationDate        0
Province/State     35353
Country/Region         0
Last Update            0
Confirmed              0
Deaths                 0
Recovered              0
dtype: int64

In [23]:
covid_data['Province/State'] = covid_data['Province/State'].fillna('Unknown')

In [24]:
covid_data.isnull().sum()

SNo                0
ObservationDate    0
Province/State     0
Country/Region     0
Last Update        0
Confirmed          0
Deaths             0
Recovered          0
dtype: int64

In [25]:
covid_data[["Confirmed","Deaths","Recovered"]] = covid_data[["Confirmed","Deaths","Recovered"]].astype(int)
covid_data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1,0,0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14,0,0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6,0,0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1,0,0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0,0,0


In [26]:
covid_data['Active'] = covid_data['Confirmed'] - (covid_data['Deaths'] + covid_data['Recovered'])
covid_data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1,0,0,1
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14,0,0,14
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6,0,0,6
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1,0,0,1
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0,0,0,0


In [27]:
df1 = covid_data[covid_data['ObservationDate'] == covid_data['ObservationDate'].max()].groupby(["Country/Region"])[["Confirmed","Active","Recovered","Deaths"]].sum().reset_index()
df1.head()

Unnamed: 0,Country/Region,Confirmed,Active,Recovered,Deaths
0,Afghanistan,39145,5089,32610,1446
1,Albania,12787,5278,7139,370
2,Algeria,50400,13274,35428,1698
3,Andorra,1753,497,1203,53
4,Angola,4363,2731,1473,159


In [30]:
df2 = confimered_cases_data[['Country/Region','Lat','Long']].reset_index()
df2.head()

Unnamed: 0,index,Country/Region,Lat,Long
0,0,Afghanistan,33.93911,67.709953
1,1,Albania,41.1533,20.1683
2,2,Algeria,28.0339,1.6596
3,3,Andorra,42.5063,1.5218
4,4,Angola,-11.2027,17.8739


In [32]:
df2.drop_duplicates(subset=['Country/Region'])
df2.head()

Unnamed: 0,index,Country/Region,Lat,Long
0,0,Afghanistan,33.93911,67.709953
1,1,Albania,41.1533,20.1683
2,2,Algeria,28.0339,1.6596
3,3,Andorra,42.5063,1.5218
4,4,Angola,-11.2027,17.8739


In [37]:
merge_dfs = pd.merge(df1,df2, on=['Country/Region'], how='inner')
merge_dfs.head()

Unnamed: 0,Country/Region,Confirmed,Active,Recovered,Deaths,index,Lat,Long
0,Afghanistan,39145,5089,32610,1446,0,33.93911,67.709953
1,Albania,12787,5278,7139,370,1,41.1533,20.1683
2,Algeria,50400,13274,35428,1698,2,28.0339,1.6596
3,Andorra,1753,497,1203,53,3,42.5063,1.5218
4,Angola,4363,2731,1473,159,4,-11.2027,17.8739


In [38]:
merge_dfs = merge_dfs.drop(columns=['index'])
merge_dfs.head()

Unnamed: 0,Country/Region,Confirmed,Active,Recovered,Deaths,Lat,Long
0,Afghanistan,39145,5089,32610,1446,33.93911,67.709953
1,Albania,12787,5278,7139,370,41.1533,20.1683
2,Algeria,50400,13274,35428,1698,28.0339,1.6596
3,Andorra,1753,497,1203,53,42.5063,1.5218
4,Angola,4363,2731,1473,159,-11.2027,17.8739


In [42]:
loc = merge_dfs[['Lat', 'Long']]
loc_list = loc.values.tolist()
loc_list[:5]

[[33.93911, 67.709953],
 [41.1533, 20.1683],
 [28.0339, 1.6596],
 [42.5063, 1.5218],
 [-11.2027, 17.8739]]

In [45]:
map2 = folium.Map(location=[20.5937, 0], tiles='CartoDB dark_matter', zoom_start=2)
marker_cluster = MarkerCluster().add_to(map2)
for point in range(0, len(loc_list)):
    folium.Marker(loc_list[point], popup=merge_dfs['Country/Region'][point]).add_to(marker_cluster)
map2