In [1]:
import pandas as pd 
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import geopandas as gpd
from shapely.geometry import Point, Polygon
import descartes

In [2]:
#load dataset
covid_confirmed = pd.read_csv('time_series_covid19_recovered_global.csv')
covid_deaths = pd.read_csv('time_series_covid19_deaths_global.csv')
covid_recovered = pd.read_csv('time_series_covid19_recovered_global.csv')

In [3]:
covid_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,260,310,331,345,397,421,458,468,472,502
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,470,488,519,531,543,570,595,605,620,627
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1779,1821,1872,1936,1998,2067,2197,2323,2467,2546
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,468,468,472,493,499,514,521,526,537,545
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,7,11,11,11,11,11,11,11,11,13


In [4]:
covid_recovered.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,260,310,331,345,397,421,458,468,472,502
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,470,488,519,531,543,570,595,605,620,627
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1779,1821,1872,1936,1998,2067,2197,2323,2467,2546
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,468,468,472,493,499,514,521,526,537,545
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,7,11,11,11,11,11,11,11,11,13


In [5]:
covid_deaths.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,64,68,72,85,90,95,104,106,109,115
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,31,31,31,31,31,31,31,31,31,31
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,450,453,459,463,465,470,476,483,488,494
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,42,43,44,45,45,46,46,47,47,48
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


In [6]:
#how many missing values
covid_confirmed.isna().sum()[covid_confirmed.isna().sum()>0]

Province/State    185
dtype: int64

In [7]:
covid_deaths.isna().sum()[covid_confirmed.isna().sum()>0]

Province/State    184
dtype: int64

In [8]:
covid_recovered.isna().sum()[covid_confirmed.isna().sum()>0]

Province/State    185
dtype: int64

In [9]:
#replace missing values with unknown
covid_confirmed = covid_confirmed.fillna('unknown')
covid_deaths = covid_deaths.fillna('unknown')
covid_recovered = covid_recovered.fillna('unknown')

In [10]:
#death cases per country 
covid_deaths['Country/Region'].value_counts()

China             33
Canada            15
France            11
United Kingdom    11
Australia          8
                  ..
India              1
Zambia             1
Malta              1
Burkina Faso       1
Malaysia           1
Name: Country/Region, Length: 187, dtype: int64

In [28]:
#death cases per country 
covid_deaths['Country/Region'].value_counts().plot(kind='bar', figsize= [300, 40])

In [29]:
#recovered cases per country
covid_recovered['Country/Region'].value_counts().plot(kind='bar', figsize= [300, 40])

In [30]:
#confirmed cases per country
covid_confirmed['Country/Region'].value_counts().plot(kind='bar', figsize= [300, 40])

In [14]:
#create new dataset for cases in China only - looking at the last updated entry 
last_update = '5/9/20'
china_cases = covid_confirmed[['Province/State', last_update]][covid_confirmed['Country/Region']=='Mainland China']

In [15]:
china_cases['recovered'] = covid_recovered[last_update][covid_recovered['Country/Region']=='Mainland China']
china_cases['deaths'] = covid_deaths[last_update][covid_deaths['Country/Region']=='Mainland China']

In [23]:
#data for may 9 2020
china_cases

In [17]:
#setting state as index 
china_cases = china_cases.set_index('Province/State')

In [24]:
china_cases.head()

In [19]:
#rename columns
china_cases = china_cases.rename(columns= {last_update:'confirmed', 'covid_recovered':'recovered', 'covid_deaths':'deaths'})

In [25]:
china_cases

In [26]:
#sorting values by confirmed - see most cases 
china_cases.sort_values(by='confirmed', ascending=True)

In [27]:
#plot
china_cases.sort_values(by='confirmed', ascending=True).plot(kind='barh', figsize=[14, 50])

Deaths/Recovered Only All Regions

In [33]:
#creating a new dataframe for all countries/regions
covid_deaths = covid_deaths.drop(columns=['Lat', 'Long'])
covid_deaths['Case'] = 'Deaths'

covid_recovered = covid_recovered.drop(columns=['Lat', 'Long'])
covid_recovered['Case'] = 'Recovered'

In [34]:
#concatenating data from global deaths and global recoveries
covid_19 = pd.concat([covid_deaths, covid_recovered])

In [35]:
#groupby dataframe with case type and country/region
cols = list(covid_19)
df = covid_19.loc[:, cols].groupby(["Country/Region", "Case"]).sum()
df.loc[:, ::-1]

Unnamed: 0_level_0,Unnamed: 1_level_0,5/9/20,5/8/20,5/7/20,5/6/20,5/5/20,5/4/20,5/3/20,5/2/20,5/1/20,4/30/20,...,1/31/20,1/30/20,1/29/20,1/28/20,1/27/20,1/26/20,1/25/20,1/24/20,1/23/20,1/22/20
Country/Region,Case,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Deaths,115,109,106,104,95,90,85,72,68,64,...,0,0,0,0,0,0,0,0,0,0
Afghanistan,Recovered,502,472,468,458,421,397,345,331,310,260,...,0,0,0,0,0,0,0,0,0,0
Albania,Deaths,31,31,31,31,31,31,31,31,31,31,...,0,0,0,0,0,0,0,0,0,0
Albania,Recovered,627,620,605,595,570,543,531,519,488,470,...,0,0,0,0,0,0,0,0,0,0
Algeria,Deaths,494,488,483,476,470,465,463,459,453,450,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yemen,Recovered,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
Zambia,Deaths,7,4,4,4,3,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
Zambia,Recovered,112,111,103,101,92,78,78,75,74,55,...,0,0,0,0,0,0,0,0,0,0
Zimbabwe,Deaths,4,4,4,4,4,4,4,4,4,4,...,0,0,0,0,0,0,0,0,0,0


In [37]:
df.columns

Index(['1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20', '1/30/20', '1/31/20',
       ...
       '4/30/20', '5/1/20', '5/2/20', '5/3/20', '5/4/20', '5/5/20', '5/6/20',
       '5/7/20', '5/8/20', '5/9/20'],
      dtype='object', length=109)

In [41]:
df.index

MultiIndex([(       'Afghanistan',    'Deaths'),
            (       'Afghanistan', 'Recovered'),
            (           'Albania',    'Deaths'),
            (           'Albania', 'Recovered'),
            (           'Algeria',    'Deaths'),
            (           'Algeria', 'Recovered'),
            (           'Andorra',    'Deaths'),
            (           'Andorra', 'Recovered'),
            (            'Angola',    'Deaths'),
            (            'Angola', 'Recovered'),
            ...
            ('West Bank and Gaza',    'Deaths'),
            ('West Bank and Gaza', 'Recovered'),
            (    'Western Sahara',    'Deaths'),
            (    'Western Sahara', 'Recovered'),
            (             'Yemen',    'Deaths'),
            (             'Yemen', 'Recovered'),
            (            'Zambia',    'Deaths'),
            (            'Zambia', 'Recovered'),
            (          'Zimbabwe',    'Deaths'),
            (          'Zimbabwe', 'Recovered')],
   

In [42]:
df.reset_index(level=['Country/Region', 'Case'])

Unnamed: 0,Country/Region,Case,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
0,Afghanistan,Deaths,0,0,0,0,0,0,0,0,...,64,68,72,85,90,95,104,106,109,115
1,Afghanistan,Recovered,0,0,0,0,0,0,0,0,...,260,310,331,345,397,421,458,468,472,502
2,Albania,Deaths,0,0,0,0,0,0,0,0,...,31,31,31,31,31,31,31,31,31,31
3,Albania,Recovered,0,0,0,0,0,0,0,0,...,470,488,519,531,543,570,595,605,620,627
4,Algeria,Deaths,0,0,0,0,0,0,0,0,...,450,453,459,463,465,470,476,483,488,494
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,Yemen,Recovered,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
370,Zambia,Deaths,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,4,4,4,7
371,Zambia,Recovered,0,0,0,0,0,0,0,0,...,55,74,75,78,78,92,101,103,111,112
372,Zimbabwe,Deaths,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4


In [279]:
covid_deaths.describe()

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
count,266.0,266.0,266.0,266.0,266.0,266.0,266.0,266.0,266.0,266.0,...,266.0,266.0,266.0,266.0,266.0,266.0,266.0,266.0,266.0,266.0
mean,0.06391,0.067669,0.097744,0.157895,0.210526,0.308271,0.492481,0.5,0.642857,0.800752,...,877.293233,897.06391,916.590226,930.338346,945.62782,967.06391,991.93609,1013.409774,1033.451128,1050.041353
std,1.042337,1.043908,1.473615,2.453621,3.18973,4.660845,7.664297,7.664793,9.933187,12.507875,...,5033.523652,5151.692272,5255.654602,5334.73732,5413.828455,5546.438359,5695.861834,5829.167335,5933.342912,6031.114726
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.0,8.0,8.0,9.0,9.0,9.0,9.0,9.0,10.0,10.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,61.75,67.5,71.75,75.25,78.0,79.75,85.75,88.25,89.0,90.75
max,17.0,17.0,24.0,40.0,52.0,76.0,125.0,125.0,162.0,204.0,...,62996.0,64943.0,66369.0,67682.0,68922.0,71064.0,73455.0,75662.0,77180.0,78795.0


In [243]:
covid_recovered.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
count,252.0,252.0,252.0,252.0,252.0,252.0,252.0,252.0,252.0,252.0,...,252.0,252.0,252.0,252.0,252.0,252.0,252.0,252.0,252.0,252.0
mean,19.997457,28.167963,0.111111,0.119048,0.142857,0.154762,0.206349,0.242063,0.424603,0.5,...,4023.357143,4176.25,4337.845238,4465.222222,4613.984127,4757.269841,4942.115079,5098.178571,5246.230159,5458.825397
std,24.40824,67.225277,1.763834,1.767827,1.958592,2.024712,2.654973,2.858017,5.069858,5.577059,...,16859.790147,17468.700495,18210.897417,18611.712422,19134.155254,19528.619764,20013.875983,20519.43001,20919.114229,21734.953475
min,-51.7963,-106.3468,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.56535,-7.8252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,24.75,25.75,25.75,27.0,28.5,30.0,30.0,31.5,36.5,37.0
50%,21.8051,23.4094,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,198.0,209.5,213.0,226.0,232.0,242.0,256.5,258.5,262.0,272.5
75%,39.329025,85.953175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1022.0,1083.25,1156.5,1224.0,1275.75,1317.25,1342.5,1353.75,1369.5,1424.5
max,71.7069,178.065,28.0,28.0,31.0,32.0,42.0,45.0,80.0,88.0,...,153947.0,164015.0,175382.0,180152.0,187180.0,189791.0,189910.0,195036.0,198993.0,212534.0


In [133]:
covid_deaths.max()

Country/Region    Zimbabwe
1/22/20                 17
1/23/20                 17
1/24/20                 24
1/25/20                 40
                    ...   
5/6/20               73455
5/7/20               75662
5/8/20               77180
5/9/20               78795
Case_Type           Deaths
Length: 111, dtype: object

In [134]:
covid_deaths['Country/Region'].value_counts()

China             33
Canada            15
United Kingdom    11
France            11
Australia          8
                  ..
Cameroon           1
Burundi            1
Grenada            1
Kuwait             1
Armenia            1
Name: Country/Region, Length: 187, dtype: int64