## Load World Wide Covid-19 Dataset

In [1]:
# Import dependencies
import pandas as pd

In [2]:
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'

In [3]:
df = pd.read_csv(url)

In [4]:
df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31279,,,International,2020-02-28,705.0,0.0,4.0,0.0,,,...,,,,,,,,,,
31280,,,International,2020-02-29,705.0,0.0,6.0,2.0,,,...,,,,,,,,,,
31281,,,International,2020-03-01,705.0,0.0,6.0,0.0,,,...,,,,,,,,,,
31282,,,International,2020-03-02,705.0,0.0,6.0,0.0,,,...,,,,,,,,,,


## Clean Up of Dataset

In [5]:
# Remove International Cases and Country with Poor Stats Availability
df = df[(df['iso_code'] != 'OWID_WRL') | (df['iso_code'] != 'OWID_KOS')]
# Remove Aggregated Data for World Wide
df = df[df['iso_code'].isna() != True]

In [6]:
# Changing Data Type
df['date']= pd.to_datetime(df['date'])

In [7]:
df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.500,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.500,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.500,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.500,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.000,0.000,...,1.337,1803.987,,597.029,9.59,,,37.746,0.500,64.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31215,OWID_WRL,,World,2020-07-15,13300560.0,220512.0,578321.0,5657.0,1706.338,28.290,...,5.355,15469.207,10.0,233.070,8.51,6.434,34.635,60.130,2.705,72.58
31216,OWID_WRL,,World,2020-07-16,13532281.0,231721.0,583897.0,5576.0,1736.065,29.728,...,5.355,15469.207,10.0,233.070,8.51,6.434,34.635,60.130,2.705,72.58
31217,OWID_WRL,,World,2020-07-17,13790342.0,258061.0,589700.0,5803.0,1769.172,33.107,...,5.355,15469.207,10.0,233.070,8.51,6.434,34.635,60.130,2.705,72.58
31218,OWID_WRL,,World,2020-07-18,14044869.0,254527.0,597148.0,7448.0,1801.826,32.653,...,5.355,15469.207,10.0,233.070,8.51,6.434,34.635,60.130,2.705,72.58


In [8]:
# Create DataFrame with First Date of Covid-19 Case by County
first_case = df[['iso_code','date']].groupby(['iso_code']).min()
first_case.columns = ['date_first_case']

In [9]:
# Create DataFrame with Country Demographics
columns = ['iso_code','population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy']

country_stats = df[columns].groupby(['iso_code']).mean()

In [10]:
# Drop Country Demographics from Original DataFrame
df = df.drop(labels=['population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy'], axis =1 )

## Write Data to PostgreSQL DB

In [11]:
from sqlalchemy import create_engine
import sys
sys.path.append('../')
from config import db_password

In [12]:
db_string = f"postgres://postgres:{db_password}@module20covid.cgcfmenzscpu.us-east-2.rds.amazonaws.com:5432/postgres"
db = create_engine(db_string)

In [13]:
country_stats.to_sql(name='Country_Stats', con=db, if_exists='replace')

In [14]:
first_case.to_sql(name='Country_FirstCase', con=db, if_exists='replace')

In [None]:
df.to_sql(name='WorldWide_Cases', con=db, if_exists='replace', index = False)