In [1]:
# Dependencies
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import pandas as pd
import numpy as np
from scipy.stats import linregress
from scipy.stats import stats
%matplotlib inline


# Import API key
from api_keys import g_key

In [2]:
# Convert dataset from below link into DataFrame
#  https://www.kaggle.com/datasets/georgesaavedra/covid19-dataset

# Load and Read csv file
load_covid_data = 'input_data/owid-covid-data.csv'
covid_world = pd.read_csv(load_covid_data)
covid_world

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,SGP,Asia,Singapore,23/01/2020,1.0,1.0,,,,,...,5.2,28.3,,2.4,83.62,0.938,,,,
1,SGP,Asia,Singapore,24/01/2020,3.0,2.0,,,,,...,5.2,28.3,,2.4,83.62,0.938,,,,
2,SGP,Asia,Singapore,25/01/2020,3.0,0.0,,,,,...,5.2,28.3,,2.4,83.62,0.938,,,,
3,SGP,Asia,Singapore,26/01/2020,4.0,1.0,,,,,...,5.2,28.3,,2.4,83.62,0.938,,,,
4,SGP,Asia,Singapore,27/01/2020,5.0,1.0,,,,,...,5.2,28.3,,2.4,83.62,0.938,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153816,WLF,Oceania,Wallis and Futuna,01/03/2022,454.0,0.0,0.0,7.0,0.0,0.0,...,,,,,79.94,,,,,
153817,WLF,Oceania,Wallis and Futuna,02/03/2022,454.0,0.0,0.0,7.0,0.0,0.0,...,,,,,79.94,,,,,
153818,WLF,Oceania,Wallis and Futuna,03/03/2022,454.0,0.0,0.0,7.0,0.0,0.0,...,,,,,79.94,,,,,
153819,WLF,Oceania,Wallis and Futuna,04/03/2022,454.0,0.0,0.0,7.0,0.0,0.0,...,,,,,79.94,,,,,


In [3]:
# Dropping duplicates, keeping only latest date/update
covid_world['date'] = pd.to_datetime(covid_world['date'])


covid_world = covid_world.drop_duplicates(subset =['location'], keep='last')
covid_world

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
772,SGP,Asia,Singapore,2022-05-03,819663.0,16274.0,17573.000,1073.0,6.0,10.571,...,5.2,28.3,,2.4,83.62,0.938,,,,
1545,HKG,Asia,Hong Kong,2022-05-03,440609.0,37529.0,43093.429,1774.0,220.0,171.000,...,,,,,84.86,0.949,,,,
2277,GIB,Europe,Gibraltar,2022-05-03,15609.0,,,101.0,0.0,0.000,...,,,,,79.93,,,,,
3018,BHR,Asia,Bahrain,2022-05-03,525171.0,1517.0,2254.429,1458.0,0.0,1.143,...,5.8,37.6,,2.0,77.29,0.852,,,,
3746,MDV,Asia,Maldives,2022-05-03,172270.0,323.0,446.000,297.0,0.0,0.143,...,2.1,55.0,95.803,,78.92,0.740,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151693,SYR,Asia,Syria,2022-05-03,54963.0,69.0,83.286,3092.0,3.0,3.429,...,,,70.598,1.5,72.70,0.567,,,,
151807,TKL,Oceania,Tokelau,2021-12-10,,,,,,,...,,,,,81.86,,,,,
152587,TWN,Asia,Taiwan,2022-05-03,20797.0,80.0,60.571,853.0,0.0,0.000,...,,,,,80.46,,,,,
153317,VAT,Europe,Vatican,2022-05-03,29.0,0.0,0.000,,,,...,,,,,75.12,,,,,


In [4]:
# Rename column 
covid_world = covid_world.rename(columns={'location': 'country'})
covid_world

Unnamed: 0,iso_code,continent,country,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
772,SGP,Asia,Singapore,2022-05-03,819663.0,16274.0,17573.000,1073.0,6.0,10.571,...,5.2,28.3,,2.4,83.62,0.938,,,,
1545,HKG,Asia,Hong Kong,2022-05-03,440609.0,37529.0,43093.429,1774.0,220.0,171.000,...,,,,,84.86,0.949,,,,
2277,GIB,Europe,Gibraltar,2022-05-03,15609.0,,,101.0,0.0,0.000,...,,,,,79.93,,,,,
3018,BHR,Asia,Bahrain,2022-05-03,525171.0,1517.0,2254.429,1458.0,0.0,1.143,...,5.8,37.6,,2.0,77.29,0.852,,,,
3746,MDV,Asia,Maldives,2022-05-03,172270.0,323.0,446.000,297.0,0.0,0.143,...,2.1,55.0,95.803,,78.92,0.740,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151693,SYR,Asia,Syria,2022-05-03,54963.0,69.0,83.286,3092.0,3.0,3.429,...,,,70.598,1.5,72.70,0.567,,,,
151807,TKL,Oceania,Tokelau,2021-12-10,,,,,,,...,,,,,81.86,,,,,
152587,TWN,Asia,Taiwan,2022-05-03,20797.0,80.0,60.571,853.0,0.0,0.000,...,,,,,80.46,,,,,
153317,VAT,Europe,Vatican,2022-05-03,29.0,0.0,0.000,,,,...,,,,,75.12,,,,,


In [5]:
# Resetting DataFrame index after drop
covid_world = covid_world.reset_index(drop=True)
covid_world

Unnamed: 0,iso_code,continent,country,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,SGP,Asia,Singapore,2022-05-03,819663.0,16274.0,17573.000,1073.0,6.0,10.571,...,5.2,28.3,,2.4,83.62,0.938,,,,
1,HKG,Asia,Hong Kong,2022-05-03,440609.0,37529.0,43093.429,1774.0,220.0,171.000,...,,,,,84.86,0.949,,,,
2,GIB,Europe,Gibraltar,2022-05-03,15609.0,,,101.0,0.0,0.000,...,,,,,79.93,,,,,
3,BHR,Asia,Bahrain,2022-05-03,525171.0,1517.0,2254.429,1458.0,0.0,1.143,...,5.8,37.6,,2.0,77.29,0.852,,,,
4,MDV,Asia,Maldives,2022-05-03,172270.0,323.0,446.000,297.0,0.0,0.143,...,2.1,55.0,95.803,,78.92,0.740,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,SYR,Asia,Syria,2022-05-03,54963.0,69.0,83.286,3092.0,3.0,3.429,...,,,70.598,1.5,72.70,0.567,,,,
217,TKL,Oceania,Tokelau,2021-12-10,,,,,,,...,,,,,81.86,,,,,
218,TWN,Asia,Taiwan,2022-05-03,20797.0,80.0,60.571,853.0,0.0,0.000,...,,,,,80.46,,,,,
219,VAT,Europe,Vatican,2022-05-03,29.0,0.0,0.000,,,,...,,,,,75.12,,,,,


In [6]:
# Getting all column names

list(covid_world.columns.values.tolist())

['iso_code',
 'continent',
 'country',
 'date',
 'total_cases',
 'new_cases',
 'new_cases_smoothed',
 'total_deaths',
 'new_deaths',
 'new_deaths_smoothed',
 'total_cases_per_million',
 'new_cases_per_million',
 'new_cases_smoothed_per_million',
 'total_deaths_per_million',
 'new_deaths_per_million',
 'new_deaths_smoothed_per_million',
 'reproduction_rate',
 'icu_patients',
 'icu_patients_per_million',
 'hosp_patients',
 'hosp_patients_per_million',
 'weekly_icu_admissions',
 'weekly_icu_admissions_per_million',
 'weekly_hosp_admissions',
 'weekly_hosp_admissions_per_million',
 'new_tests',
 'total_tests',
 'total_tests_per_thousand',
 'new_tests_per_thousand',
 'new_tests_smoothed',
 'new_tests_smoothed_per_thousand',
 'positive_rate',
 'tests_per_case',
 'tests_units',
 'total_vaccinations',
 'people_vaccinated',
 'people_fully_vaccinated',
 'total_boosters',
 'new_vaccinations',
 'new_vaccinations_smoothed',
 'total_vaccinations_per_hundred',
 'people_vaccinated_per_hundred',
 'peop

In [7]:
# Filtering columns to keep only the ones needed for analysis

world_filtered = covid_world.filter(['iso_code',
 'continent',
 'country',
 'date',
 'total_cases',
 'total_deaths',                                  
 'total_cases_per_million',
 'total_deaths_per_million',
 'population',
 'population_density',
 'median_age',
 'aged_65_older',
 'aged_70_older',
 'hospital_beds_per_thousand',
 'life_expectancy'])
world_filtered.head(221)

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy
0,SGP,Asia,Singapore,2022-05-03,819663.0,1073.0,150297.602,196.751,5453600,7915.731,42.4,12.922,7.049,2.4,83.62
1,HKG,Asia,Hong Kong,2022-05-03,440609.0,1774.0,58337.173,234.880,7552800,7039.714,44.8,16.303,10.158,,84.86
2,GIB,Europe,Gibraltar,2022-05-03,15609.0,101.0,463298.804,2997.833,33691,3457.100,,,,,79.93
3,BHR,Asia,Bahrain,2022-05-03,525171.0,1458.0,300390.380,833.955,1748295,1935.907,32.4,2.372,1.387,2.0,77.29
4,MDV,Asia,Maldives,2022-05-03,172270.0,297.0,316894.154,546.338,543620,1454.433,30.6,4.120,2.875,,78.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,SYR,Asia,Syria,2022-05-03,54963.0,3092.0,3007.435,169.186,18275704,,21.7,,2.577,1.5,72.70
217,TKL,Oceania,Tokelau,2021-12-10,,,,,1368,,,,,,81.86
218,TWN,Asia,Taiwan,2022-05-03,20797.0,853.0,871.809,35.758,23855008,,42.2,,8.353,,80.46
219,VAT,Europe,Vatican,2022-05-03,29.0,,35714.286,,812,,,,,,75.12


In [8]:
# Convert dataset from below link into DataFrame
#  https://www.kaggle.com/datasets/paultimothymooney/latitude-and-longitude-for-every-country-and-state
# Load and Read csv file
load_lat_lng = 'input_data/world_country_and_usa_states_latitude_and_longitude_values.csv'
lat_lng = pd.read_csv(load_lat_lng)
lat_lng_filtered = lat_lng.filter(['latitude', 'longitude', 'country'])
lat_lng_filtered.sort_values('country')
lat_lng_df = lat_lng_filtered.reset_index(drop=True)
lat_lng_df

Unnamed: 0,latitude,longitude,country
0,42.546245,1.601554,Andorra
1,23.424076,53.847818,United Arab Emirates
2,33.939110,67.709953,Afghanistan
3,17.060816,-61.796428,Antigua and Barbuda
4,18.220554,-63.068615,Anguilla
...,...,...,...
240,15.552727,48.516388,Yemen
241,-12.827500,45.166244,Mayotte
242,-30.559482,22.937506,South Africa
243,-13.133897,27.849332,Zambia


In [9]:
# Merging the two dataframes
final_covid = pd.merge(world_filtered, lat_lng_df, on = 'country', how = 'left' )
world_covid = final_covid.reset_index(drop=True)
world_covid

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude
0,SGP,Asia,Singapore,2022-05-03,819663.0,1073.0,150297.602,196.751,5453600,7915.731,42.4,12.922,7.049,2.4,83.62,1.352083,103.819836
1,HKG,Asia,Hong Kong,2022-05-03,440609.0,1774.0,58337.173,234.880,7552800,7039.714,44.8,16.303,10.158,,84.86,22.396428,114.109497
2,GIB,Europe,Gibraltar,2022-05-03,15609.0,101.0,463298.804,2997.833,33691,3457.100,,,,,79.93,36.137741,-5.345374
3,BHR,Asia,Bahrain,2022-05-03,525171.0,1458.0,300390.380,833.955,1748295,1935.907,32.4,2.372,1.387,2.0,77.29,25.930414,50.637772
4,MDV,Asia,Maldives,2022-05-03,172270.0,297.0,316894.154,546.338,543620,1454.433,30.6,4.120,2.875,,78.92,3.202778,73.220680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,SYR,Asia,Syria,2022-05-03,54963.0,3092.0,3007.435,169.186,18275704,,21.7,,2.577,1.5,72.70,34.802075,38.996815
217,TKL,Oceania,Tokelau,2021-12-10,,,,,1368,,,,,,81.86,-8.967363,-171.855881
218,TWN,Asia,Taiwan,2022-05-03,20797.0,853.0,871.809,35.758,23855008,,42.2,,8.353,,80.46,23.697810,120.960515
219,VAT,Europe,Vatican,2022-05-03,29.0,,35714.286,,812,,,,,,75.12,,


In [10]:
# Adding new columns for summary statistical analysis
world_covid['% of World Total Deaths']=''
world_covid['% of World Total Cases']=''
world_covid['% of World Population']=''
world_covid

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude,% of World Total Deaths,% of World Total Cases,% of World Population
0,SGP,Asia,Singapore,2022-05-03,819663.0,1073.0,150297.602,196.751,5453600,7915.731,42.4,12.922,7.049,2.4,83.62,1.352083,103.819836,,,
1,HKG,Asia,Hong Kong,2022-05-03,440609.0,1774.0,58337.173,234.880,7552800,7039.714,44.8,16.303,10.158,,84.86,22.396428,114.109497,,,
2,GIB,Europe,Gibraltar,2022-05-03,15609.0,101.0,463298.804,2997.833,33691,3457.100,,,,,79.93,36.137741,-5.345374,,,
3,BHR,Asia,Bahrain,2022-05-03,525171.0,1458.0,300390.380,833.955,1748295,1935.907,32.4,2.372,1.387,2.0,77.29,25.930414,50.637772,,,
4,MDV,Asia,Maldives,2022-05-03,172270.0,297.0,316894.154,546.338,543620,1454.433,30.6,4.120,2.875,,78.92,3.202778,73.220680,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,SYR,Asia,Syria,2022-05-03,54963.0,3092.0,3007.435,169.186,18275704,,21.7,,2.577,1.5,72.70,34.802075,38.996815,,,
217,TKL,Oceania,Tokelau,2021-12-10,,,,,1368,,,,,,81.86,-8.967363,-171.855881,,,
218,TWN,Asia,Taiwan,2022-05-03,20797.0,853.0,871.809,35.758,23855008,,42.2,,8.353,,80.46,23.697810,120.960515,,,
219,VAT,Europe,Vatican,2022-05-03,29.0,,35714.286,,812,,,,,,75.12,,,,,


In [11]:
# Getting Percentages for new columns added
world_covid['% of World Total Deaths']= (world_covid['total_deaths']/world_covid['total_deaths'].sum() * 100).round(3)
world_covid['% of World Total Cases']= (world_covid['total_cases']/world_covid['total_cases'].sum() * 100).round(3)
world_covid['% of World Population']= (world_covid['population']/world_covid['population'].sum() * 100).round(3)

world_covid

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude,% of World Total Deaths,% of World Total Cases,% of World Population
0,SGP,Asia,Singapore,2022-05-03,819663.0,1073.0,150297.602,196.751,5453600,7915.731,42.4,12.922,7.049,2.4,83.62,1.352083,103.819836,0.018,0.184,0.070
1,HKG,Asia,Hong Kong,2022-05-03,440609.0,1774.0,58337.173,234.880,7552800,7039.714,44.8,16.303,10.158,,84.86,22.396428,114.109497,0.030,0.099,0.096
2,GIB,Europe,Gibraltar,2022-05-03,15609.0,101.0,463298.804,2997.833,33691,3457.100,,,,,79.93,36.137741,-5.345374,0.002,0.004,0.000
3,BHR,Asia,Bahrain,2022-05-03,525171.0,1458.0,300390.380,833.955,1748295,1935.907,32.4,2.372,1.387,2.0,77.29,25.930414,50.637772,0.024,0.118,0.022
4,MDV,Asia,Maldives,2022-05-03,172270.0,297.0,316894.154,546.338,543620,1454.433,30.6,4.120,2.875,,78.92,3.202778,73.220680,0.005,0.039,0.007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,SYR,Asia,Syria,2022-05-03,54963.0,3092.0,3007.435,169.186,18275704,,21.7,,2.577,1.5,72.70,34.802075,38.996815,0.052,0.012,0.233
217,TKL,Oceania,Tokelau,2021-12-10,,,,,1368,,,,,,81.86,-8.967363,-171.855881,,,0.000
218,TWN,Asia,Taiwan,2022-05-03,20797.0,853.0,871.809,35.758,23855008,,42.2,,8.353,,80.46,23.697810,120.960515,0.014,0.005,0.304
219,VAT,Europe,Vatican,2022-05-03,29.0,,35714.286,,812,,,,,,75.12,,,,0.000,0.000


In [12]:
# Getting the Top 20 Countries with highest total Confirmed Covid Deaths
highest_deaths = world_covid.sort_values(by=['total_deaths'],ascending = False).head(20).reset_index(drop=True)
highest_deaths.to_csv('output_data/highest_deaths.csv', encoding='utf-8', index=False)
highest_deaths


Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude,% of World Total Deaths,% of World Total Cases,% of World Population
0,USA,North America,United States,2022-05-03,79265726.0,958437.0,238095.936,2878.923,332915074,35.608,38.3,15.413,9.732,2.77,78.86,37.09024,-95.712891,15.995,17.817,4.245
1,BRA,South America,Brazil,2022-05-03,29040800.0,652216.0,135708.832,3047.832,213993441,25.04,33.5,8.552,5.06,2.2,75.88,-14.235004,-51.92528,10.885,6.528,2.729
2,IND,Asia,India,2022-05-03,42962953.0,515036.0,30832.98,369.623,1393409033,450.419,28.2,5.989,3.414,0.53,69.66,20.593684,78.96288,8.595,9.657,17.767
3,RUS,Europe,Russia,2022-05-03,16619507.0,348467.0,113900.875,2388.199,145912022,8.823,39.6,14.178,9.393,8.05,72.58,61.52401,105.318756,5.815,3.736,1.861
4,MEX,North America,Mexico,2022-05-03,5554392.0,319604.0,42640.084,2453.543,130262220,66.444,29.3,6.857,4.321,1.38,75.05,23.634501,-102.552784,5.334,1.248,1.661
5,PER,South America,Peru,2022-05-03,3522484.0,210907.0,105591.9,6322.263,33359415,25.129,29.1,7.151,4.455,1.6,76.74,-9.189967,-75.015152,3.52,0.792,0.425
6,GBR,Europe,United Kingdom,2022-05-03,19172095.0,162152.0,281086.442,2377.347,68207114,272.898,40.8,18.517,12.527,2.54,81.32,55.378051,-3.435973,2.706,4.309,0.87
7,ITA,Europe,Italy,2022-05-03,12990223.0,155782.0,215185.808,2580.562,60367471,205.859,47.9,23.021,16.24,3.18,83.51,41.87194,12.56738,2.6,2.92,0.77
8,IDN,Asia,Indonesia,2022-05-03,5723858.0,149918.0,20711.467,542.47,276361788,145.725,29.3,5.319,3.053,1.04,71.72,-0.789275,113.921327,2.502,1.287,3.524
9,FRA,Europe,France,2022-05-03,23064766.0,139312.0,342095.547,2066.269,67422000,122.578,42.0,19.718,13.079,5.98,82.66,46.227638,2.213749,2.325,5.184,0.86


In [13]:
# Getting the Top 20 Countries with highest total Confirmed Covid Cases
highest_cases = world_covid.sort_values(by=['total_cases'],ascending = False).head(20).reset_index(drop=True)
highest_cases.to_csv('output_data/highest_cases.csv', encoding='utf-8', index=False)
highest_cases

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude,% of World Total Deaths,% of World Total Cases,% of World Population
0,USA,North America,United States,2022-05-03,79265726.0,958437.0,238095.936,2878.923,332915074,35.608,38.3,15.413,9.732,2.77,78.86,37.09024,-95.712891,15.995,17.817,4.245
1,IND,Asia,India,2022-05-03,42962953.0,515036.0,30832.98,369.623,1393409033,450.419,28.2,5.989,3.414,0.53,69.66,20.593684,78.96288,8.595,9.657,17.767
2,BRA,South America,Brazil,2022-05-03,29040800.0,652216.0,135708.832,3047.832,213993441,25.04,33.5,8.552,5.06,2.2,75.88,-14.235004,-51.92528,10.885,6.528,2.729
3,FRA,Europe,France,2022-05-03,23064766.0,139312.0,342095.547,2066.269,67422000,122.578,42.0,19.718,13.079,5.98,82.66,46.227638,2.213749,2.325,5.184,0.86
4,GBR,Europe,United Kingdom,2022-05-03,19172095.0,162152.0,281086.442,2377.347,68207114,272.898,40.8,18.517,12.527,2.54,81.32,55.378051,-3.435973,2.706,4.309,0.87
5,RUS,Europe,Russia,2022-05-03,16619507.0,348467.0,113900.875,2388.199,145912022,8.823,39.6,14.178,9.393,8.05,72.58,61.52401,105.318756,5.815,3.736,1.861
6,DEU,Europe,Germany,2022-05-03,15830112.0,124107.0,188677.272,1479.217,83900471,237.016,46.6,21.453,15.957,8.0,81.33,51.165691,10.451526,2.071,3.558,1.07
7,TUR,Asia,Turkey,2022-05-03,14326217.0,95379.0,168459.032,1121.542,85042736,104.914,31.6,8.153,5.061,2.81,77.69,38.963745,35.243322,1.592,3.22,1.084
8,ITA,Europe,Italy,2022-05-03,12990223.0,155782.0,215185.808,2580.562,60367471,205.859,47.9,23.021,16.24,3.18,83.51,41.87194,12.56738,2.6,2.92,0.77
9,ESP,Europe,Spain,2022-05-03,11100428.0,100413.0,237466.636,2148.092,46745211,93.105,45.5,19.436,13.799,2.97,83.56,40.463667,-3.74922,1.676,2.495,0.596


In [14]:
# Getting the Top 20 Countries with lowest total Confirmed Covid Cases
lowest_cases = world_covid.sort_values(by=['total_cases'],ascending = True).head(20).reset_index(drop=True)
lowest_cases.to_csv('output_data/lowest_cases.csv', encoding='utf-8', index=False)
lowest_cases

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude,% of World Total Deaths,% of World Total Cases,% of World Population
0,FSM,Oceania,Micronesia (country),2022-05-03,1.0,,8.602,,116255,150.777,23.0,4.81,2.392,,67.88,,,,0.0,0.001
1,COK,Oceania,Cook Islands,2022-05-03,2.0,,113.817,,17572,,,,,,76.25,-21.236736,-159.777671,,0.0,0.0
2,SHN,Africa,Saint Helena,2022-05-03,4.0,,656.276,,6095,,,,,,80.56,-24.143474,-10.030696,,0.0,0.0
3,MHL,Oceania,Marshall Islands,2022-05-03,7.0,,117.414,,59618,295.15,,,,2.7,73.7,7.131474,171.184478,,0.0,0.001
4,VUT,Oceania,Vanuatu,2022-05-03,18.0,1.0,57.24,3.18,314464,22.662,23.1,4.394,2.62,,70.47,-15.376706,166.959158,0.0,0.0,0.004
5,VAT,Europe,Vatican,2022-05-03,29.0,,35714.286,,812,,,,,,75.12,,,,0.0,0.0
6,WSM,Oceania,Samoa,2022-05-03,33.0,,164.881,,200144,69.413,22.0,5.606,3.564,,73.32,-13.759029,-172.104629,,0.0,0.003
7,FLK,South America,Falkland Islands,2022-05-03,115.0,,32596.372,,3528,,,,,,81.44,,,,0.0,0.0
8,MSR,North America,Montserrat,2022-05-03,164.0,2.0,32925.115,401.526,4981,,,,,,74.16,16.742498,-62.187366,0.0,0.0,0.0
9,WLF,Oceania,Wallis and Futuna,2022-05-03,454.0,7.0,40923.021,630.972,11094,,,,,,79.94,-13.768752,-177.156097,0.0,0.0,0.0


In [15]:
# Getting the Top 20 Countries with lowest total Confirmed Covid Deaths
lowest_deaths = world_covid.sort_values(by=['total_deaths'],ascending = True).head(20).reset_index(drop=True)
lowest_deaths.to_csv('output_data/lowest_deaths.csv', encoding='utf-8', index=False)
lowest_deaths

Unnamed: 0,iso_code,continent,country,date,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,population,population_density,median_age,aged_65_older,aged_70_older,hospital_beds_per_thousand,life_expectancy,latitude,longitude,% of World Total Deaths,% of World Total Cases,% of World Population
0,SPM,North America,Saint Pierre and Miquelon,2022-05-03,1102.0,1.0,190954.774,173.28,5771,,,,,,81.07,46.941936,-56.27111,0.0,0.0,0.0
1,VUT,Oceania,Vanuatu,2022-05-03,18.0,1.0,57.24,3.18,314464,22.662,23.1,4.394,2.62,,70.47,-15.376706,166.959158,0.0,0.0,0.004
2,MSR,North America,Montserrat,2022-05-03,164.0,2.0,32925.115,401.526,4981,,,,,,74.16,16.742498,-62.187366,0.0,0.0,0.0
3,PLW,Oceania,Palau,2022-05-03,3844.0,6.0,211510.95,330.142,18174,47.237,,,,4.8,73.7,7.51498,134.58252,0.0,0.001,0.0
4,WLF,Oceania,Wallis and Futuna,2022-05-03,454.0,7.0,40923.021,630.972,11094,,,,,,79.94,-13.768752,-177.156097,0.0,0.0,0.0
5,BTN,Asia,Bhutan,2022-05-03,14451.0,7.0,18529.299,8.976,779900,21.188,28.6,4.885,2.977,1.7,71.78,27.514162,90.433601,0.0,0.003,0.01
6,AIA,North America,Anguilla,2022-05-03,2555.0,9.0,168925.62,595.041,15125,,,,,,81.88,18.220554,-63.068615,0.0,0.001,0.0
7,KIR,Oceania,Kiribati,2022-05-03,2985.0,11.0,24590.569,90.619,121388,143.701,23.2,3.895,2.21,1.9,68.37,-3.370417,-168.734039,0.0,0.001,0.002
8,CYM,North America,Cayman Islands,2022-05-03,19373.0,17.0,291332.07,255.647,66498,256.496,,,,,83.92,19.513469,-80.566956,0.0,0.004,0.001
9,GRL,North America,Greenland,2022-05-03,11806.0,20.0,207603.573,351.692,56868,0.137,,,,,71.7,71.706936,-42.604303,0.0,0.003,0.001


In [16]:
# Grouping Data by Continent
grouped_covid = world_covid.copy().groupby(['continent']).sum()
continent_covid = grouped_covid[['total_cases','total_deaths','population',
                                 '% of World Total Deaths', '% of World Total Cases',
                                 '% of World Population']]
continent_covid = continent_covid.reset_index()
continent_covid

Unnamed: 0,continent,total_cases,total_deaths,population,% of World Total Deaths,% of World Total Cases,% of World Population
0,Africa,11268386.0,249206.0,1371693397,4.157,2.535,17.491
1,Asia,120758338.0,1363608.0,4651957696,22.754,27.142,59.316
2,Europe,161014546.0,1721972.0,748850203,28.737,36.194,9.551
3,North America,93397289.0,1387009.0,592834824,23.148,20.993,7.558
4,Oceania,3831682.0,8037.0,43202462,0.134,0.861,0.551
5,South America,54622339.0,1262230.0,433953687,21.065,12.279,5.534


In [17]:
# Saving Continent dataframe to csv file for later plotting
continent_covid.to_csv('output_data/continent_covid.csv', encoding='utf-8', index=False)

In [18]:
# Saving world Covid dataframe to csv file for later plottting
world_covid.to_csv('output_data/world_covid.csv', encoding='utf-8', index=False)