<a href="https://colab.research.google.com/github/terrencekwon/data-science-portfolio/blob/master/covid19_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Visualizing the COVID-19 Pandemic

In [36]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt


In [42]:
path = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-24-2020.csv'
df = pd.read_csv(path)

df.info()
df.head(20)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3950 entries, 0 to 3949
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   FIPS                 3255 non-null   float64
 1   Admin2               3260 non-null   object 
 2   Province_State       3782 non-null   object 
 3   Country_Region       3950 non-null   object 
 4   Last_Update          3950 non-null   object 
 5   Lat                  3870 non-null   float64
 6   Long_                3870 non-null   float64
 7   Confirmed            3950 non-null   int64  
 8   Deaths               3950 non-null   int64  
 9   Recovered            3950 non-null   int64  
 10  Active               3949 non-null   float64
 11  Combined_Key         3950 non-null   object 
 12  Incidence_Rate       3870 non-null   float64
 13  Case-Fatality_Ratio  3898 non-null   float64
dtypes: float64(6), int64(3), object(5)
memory usage: 432.2+ KB


Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,,,,Afghanistan,2020-08-25 04:28:02,33.93911,67.709953,38054,1389,28360,8305.0,Afghanistan,97.753973,3.650076
1,,,,Albania,2020-08-25 04:28:02,41.1533,20.1683,8605,254,4413,3938.0,Albania,299.013135,2.951772
2,,,,Algeria,2020-08-25 04:28:02,28.0339,1.6596,41858,1446,29369,11043.0,Algeria,95.45497,3.454537
3,,,,Andorra,2020-08-25 04:28:02,42.5063,1.5218,1060,53,877,130.0,Andorra,1371.901896,5.0
4,,,,Angola,2020-08-25 04:28:02,-11.2027,17.8739,2222,100,877,1245.0,Angola,6.760731,4.50045
5,,,,Antigua and Barbuda,2020-08-25 04:28:02,17.0608,-61.7964,94,3,89,2.0,Antigua and Barbuda,95.98889,3.191489
6,,,,Argentina,2020-08-25 04:28:02,-38.4161,-63.6167,350867,7366,256789,86712.0,Argentina,776.326956,2.099371
7,,,,Armenia,2020-08-25 04:28:02,40.0691,45.0382,42825,854,36049,5922.0,Armenia,1445.211549,1.994162
8,,,Australian Capital Territory,Australia,2020-08-25 04:28:02,-35.4735,149.0124,113,3,110,0.0,"Australian Capital Territory, Australia",26.395702,2.654867
9,,,New South Wales,Australia,2020-08-25 04:28:02,-33.8688,151.2093,3991,52,2994,945.0,"New South Wales, Australia",49.162355,1.302932


Deleting unwanted columns

In [43]:
df = df.drop(columns=['FIPS', 'Admin2', 'Province_State', 'Last_Update', 'Combined_Key'])
df.head()

Unnamed: 0,Country_Region,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incidence_Rate,Case-Fatality_Ratio
0,Afghanistan,33.93911,67.709953,38054,1389,28360,8305.0,97.753973,3.650076
1,Albania,41.1533,20.1683,8605,254,4413,3938.0,299.013135,2.951772
2,Algeria,28.0339,1.6596,41858,1446,29369,11043.0,95.45497,3.454537
3,Andorra,42.5063,1.5218,1060,53,877,130.0,1371.901896,5.0
4,Angola,-11.2027,17.8739,2222,100,877,1245.0,6.760731,4.50045


We are only interested in the total cases in each country, but our data set has the cases split up within each province/state. So we want to group the case counts by country.

In [52]:
country_totals = df.groupby('Country_Region').sum()
country_totals.head(20)

Unnamed: 0_level_0,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incidence_Rate,Case-Fatality_Ratio
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Afghanistan,33.93911,67.709953,38054,1389,28360,8305.0,97.753973,3.650076
Albania,41.1533,20.1683,8605,254,4413,3938.0,299.013135,2.951772
Algeria,28.0339,1.6596,41858,1446,29369,11043.0,95.45497,3.454537
Andorra,42.5063,1.5218,1060,53,877,130.0,1371.901896,5.0
Angola,-11.2027,17.8739,2222,100,877,1245.0,6.760731,4.50045
Antigua and Barbuda,17.0608,-61.7964,94,3,89,2.0,95.98889,3.191489
Argentina,-38.4161,-63.6167,350867,7366,256789,86712.0,776.326956,2.099371
Armenia,40.0691,45.0382,42825,854,36049,5922.0,1445.211549,1.994162
Australia,-256.8502,1130.8439,25053,525,19601,4927.0,483.243994,14.766837
Austria,47.5162,14.5501,25495,733,21657,3105.0,283.076479,2.875074


In [39]:
df.sort_values(by='Confirmed', ascending=False).head(20)

Unnamed: 0,Country_Region,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incidence_Rate,Case-Fatality_Ratio
54,Brazil,-23.5505,-46.6333,756480,28505,570165,157810.0,1647.420878,3.76811
253,India,19.449759,76.108221,693398,22465,502490,168443.0,563.077977,3.239842
574,South Africa,-30.5595,22.9375,611450,13159,516494,81797.0,1030.961905,2.152097
263,India,11.006091,78.400624,385352,6614,325456,53282.0,495.048468,1.716353
234,India,15.9129,79.74,361712,3368,268828,89516.0,671.037536,0.931128
271,Iran,32.427908,53.688046,361150,20776,311365,29009.0,429.976548,5.752734
6,Argentina,-38.4161,-63.6167,350867,7366,256789,86712.0,776.326956,2.099371
565,Saudi Arabia,23.885942,45.079162,308654,3691,282888,22075.0,886.583498,1.195837
20,Bangladesh,23.685,90.3563,297083,3983,182875,110225.0,180.389892,1.340703
459,Peru,-11.766533,-76.604498,288274,12212,0,276062.0,2712.273604,4.236247
