## Load World Wide Covid-19 Dataset

In [1]:
# Import dependencies
import pandas as pd

In [2]:
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'

In [3]:
df = pd.read_csv(url)

In [4]:
df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30649,,,International,2020-02-28,705.0,0.0,4.0,0.0,,,...,,,,,,,,,,
30650,,,International,2020-02-29,705.0,0.0,6.0,2.0,,,...,,,,,,,,,,
30651,,,International,2020-03-01,705.0,0.0,6.0,0.0,,,...,,,,,,,,,,
30652,,,International,2020-03-02,705.0,0.0,6.0,0.0,,,...,,,,,,,,,,


## Clean Up of Dataset

In [9]:
# Remove International Cases and Country with Poor Stats Availability
df = df[(df['iso_code'] != 'OWID_WRL') | (df['iso_code'] != 'OWID_KOS')]
# Remove Aggregated Data for World Wide
df = df[df['iso_code'].isna() != True]

In [10]:
# Changing Data Type
df['date']= pd.to_datetime(df['date'])

In [11]:
df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30386,ZWE,Africa,Zimbabwe,2020-07-12,982.0,40.0,18.0,5.0,66.070,2.691,...,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49
30387,ZWE,Africa,Zimbabwe,2020-07-13,985.0,3.0,18.0,0.0,66.272,0.202,...,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49
30388,ZWE,Africa,Zimbabwe,2020-07-14,1034.0,49.0,19.0,1.0,69.569,3.297,...,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49
30389,ZWE,Africa,Zimbabwe,2020-07-15,1034.0,0.0,19.0,0.0,69.569,0.000,...,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49


In [25]:
# Create DataFrame with First Date of Covid-19 Case by County
first_case = df[['iso_code','date']].groupby(['iso_code']).min()
first_case.columns = ['date_first_case']

In [13]:
# Create DataFrame with Country Demographics
columns = ['iso_code','population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy']

country_stats = df[columns].groupby(['iso_code']).mean()

In [14]:
# Drop Country Demographics from Original DataFrame
df = df.drop(labels=['population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy'], axis =1 )

## Write Data to PostgreSQL DB

In [17]:
from sqlalchemy import create_engine
import sys
sys.path.append('../')
from config import db_password

In [18]:
db_string = f"postgres://postgres:{db_password}@module20covid.cgcfmenzscpu.us-east-2.rds.amazonaws.com:5432/postgres"
db = create_engine(db_string)

In [19]:
country_stats.to_sql(name='Country_Stats', con=db, if_exists='replace')

In [28]:
first_case.to_sql(name='Country_FirstCase', con=db, if_exists='replace')

In [21]:
df.to_sql(name='WorldWide_Cases', con=db, if_exists='replace')first_case