In [1]:
# Import modules
import pandas as pd
from sqlalchemy import create_engine
from config import postgresql_pass

### Extract CSVs into DataFrames
 

In [2]:
wealth_file = "Resources/combined_final_last_10_years.csv"
wealth_df = pd.read_csv(wealth_file)
wealth_df.head()

Unnamed: 0,continent,country,year,demox_eiu,income_per_person,invest_%_gdp,tax_%_gdp,gini_index
0,Africa,"Congo, Dem. Rep.",2006,27.6,605,14.6,6.83,42.2
1,Africa,"Congo, Dem. Rep.",2007,25.2,623,13.7,6.99,42.1
2,Africa,"Congo, Dem. Rep.",2008,22.8,640,10.9,8.97,42.1
3,Africa,"Congo, Dem. Rep.",2009,22.1,637,14.6,7.89,42.1
4,Africa,"Congo, Dem. Rep.",2010,21.5,660,28.8,8.35,42.1


In [3]:
covid_file = "Resources/global_covid19_mortality_rates.csv"
covid_df = pd.read_csv(covid_file)
covid_df.head()

Unnamed: 0.1,Unnamed: 0,Country,Confirmed,Deaths,Mortality Ratio,Latitude,Longitude,Country_Code
0,0,Afghanistan,39145.0,1446.0,3.69,33.93911,67.709953,AF
1,1,Albania,12787.0,370.0,2.89,41.153332,20.168331,AL
2,2,Algeria,50400.0,1698.0,3.37,28.033886,1.659626,DZ
3,3,Andorra,1753.0,53.0,3.02,42.546245,1.601554,AD
4,4,Angola,4363.0,159.0,3.64,-11.202692,17.873887,AO


### Transform wealth DataFrame

In [8]:
# Only grab specific columns
wealth_cols = ["country", "year", "income_per_person"]
wealth_trans_df = wealth_df[wealth_cols].copy()

# Clean the data by dropping duplicates, dropping na, only grabbing 2016, and setting the index
wealth_trans_df = wealth_trans_df[wealth_trans_df["year"] == 2016]
wealth_trans_df.drop_duplicates("country", inplace=True)
wealth_trans_df.dropna(inplace=True)
wealth_trans_df.set_index("country", inplace=True)
wealth_trans_df.head()

Unnamed: 0_level_0,year,income_per_person
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Angola,2016,6260
Botswana,2016,16000
Burkina Faso,2016,1640
Cote d'Ivoire,2016,3400
Kenya,2016,2890


### Transform covid DataFrame

In [10]:
# Only grab specific columns
covid_cols = ["Country", "Deaths", "Mortality Ratio"]
covid_trans_df = covid_df[covid_cols].copy()

# Rename Columns
covid_rename = { "Country": "country",
                 "Deaths": "deaths", 
                 "Mortality Ratio": "mortality_rate"
}
covid_trans_df = covid_trans_df.rename(columns=covid_rename)

# Clean the data by dropping duplicates, dropping na, and setting the index
covid_trans_df.drop_duplicates("country", inplace=True)
covid_trans_df.dropna(inplace=True)
covid_trans_df.set_index("country", inplace=True)
covid_trans_df.head()

Unnamed: 0_level_0,deaths,mortality_rate
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,1446.0,3.69
Albania,370.0,2.89
Algeria,1698.0,3.37
Andorra,53.0,3.02
Angola,159.0,3.64


### Create database connection

In [None]:
# Set up the Database variables
username = "postgres"
password = postgresql_pass
database = "etl-project_db"
DATABASE_URI = f"postgres://{username}:{password}@localhost/{database}"

# Connect to the database
engine = create_engine(DATABASE_URI)
connection = engine.connect()

### Load DataFrames into database

In [None]:
wealth_trans_df.to_sql(name='wealth', con=engine, if_exists='append', index=True)

In [None]:
covid_trans_df.to_sql(name='covid', con=engine, if_exists='append', index=True)