In [1]:
# import dependencies
import pandas as pd 
from sqlalchemy import create_engine
from config import postgresql_pass

In [2]:
#infantMortalityRate.csv-> Probability of dying between birth and age 1 per 1000 live births.
infant_file = "Resources/infantMortalityRate.csv"
infant_df = pd.read_csv(infant_file)
infant_df

Unnamed: 0,Location,Period,Indicator,Dim1,First Tooltip
0,Afghanistan,2019,Infant mortality rate (probability of dying be...,Both sexes,46.51 [37.71-55.82]
1,Afghanistan,2019,Infant mortality rate (probability of dying be...,Male,49.8 [40.2-59.95]
2,Afghanistan,2019,Infant mortality rate (probability of dying be...,Female,43.05 [34.82-51.96]
3,Afghanistan,2018,Infant mortality rate (probability of dying be...,Both sexes,48.04 [39.94-56.32]
4,Afghanistan,2018,Infant mortality rate (probability of dying be...,Male,51.35 [42.56-60.4]
...,...,...,...,...,...
29994,Timor-Leste,2002,Infant mortality rate (probability of dying be...,Both sexes,77.08 [70.29-84.49]
29995,Timor-Leste,2002,Infant mortality rate (probability of dying be...,Male,82.53 [74.86-90.67]
29996,Timor-Leste,2002,Infant mortality rate (probability of dying be...,Female,71.37 [64.81-78.76]
29997,Timor-Leste,2001,Infant mortality rate (probability of dying be...,Both sexes,80.96 [74.1-88.4]


In [3]:
# Extract specific columns from the table 
infant_cols = ["Location", "Period", "First Tooltip"]
infant_trans_df = infant_df[infant_cols].copy()
infant_trans_df["First Tooltip"]=infant_trans_df["First Tooltip"].str.split(" ").str[0]
infant_trans_df["First Tooltip"]=infant_trans_df["First Tooltip"].astype(float)
#Rename columns
infant_rename= {"Location": "country", "Period": "year", "First Tooltip": "mortality_rate" }
infant_trans_df = infant_trans_df.rename(columns=infant_rename)

# Format the data by setting the index, and extracting specific years 2016-2019
infant_trans_df = infant_trans_df[infant_trans_df["year"] > 2016]
infant_trans_df.set_index("country", inplace=True)
infant_trans_df

Unnamed: 0_level_0,year,mortality_rate
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2019,46.51
Afghanistan,2019,49.80
Afghanistan,2019,43.05
Afghanistan,2018,48.04
Afghanistan,2018,51.35
...,...,...
Timor-Leste,2018,42.65
Timor-Leste,2018,35.62
Timor-Leste,2017,40.49
Timor-Leste,2017,43.94


In [4]:
#adolescentBirthRate.csv -> Adolescent birth rate per 1000 women aged 15-19 years
adolescent_file = "Resources/adolescentBirthRate.csv"
adolescent_df = pd.read_csv(adolescent_file)
adolescent_df

Unnamed: 0,Location,Period,Indicator,First Tooltip
0,Afghanistan,2017,Adolescent birth rate (per 1000 women aged 15-...,62.0
1,Afghanistan,2014,Adolescent birth rate (per 1000 women aged 15-...,77.2
2,Afghanistan,2013,Adolescent birth rate (per 1000 women aged 15-...,87.0
3,Afghanistan,2011,Adolescent birth rate (per 1000 women aged 15-...,125.7
4,Afghanistan,2009,Adolescent birth rate (per 1000 women aged 15-...,80.0
...,...,...,...,...
2184,Zimbabwe,2008,Adolescent birth rate (per 1000 women aged 15-...,112.0
2185,Zimbabwe,2007,Adolescent birth rate (per 1000 women aged 15-...,102.6
2186,Zimbabwe,2004,Adolescent birth rate (per 1000 women aged 15-...,98.7
2187,Zimbabwe,2003,Adolescent birth rate (per 1000 women aged 15-...,101.0


In [5]:
# Extract specific columns from the table 
adolescent_cols = ["Location", "Period", "First Tooltip"]
adolescent_trans_df = adolescent_df[adolescent_cols].copy()

#Rename columns
adolescent_rename= {"Location": "country", "Period": "year", "First Tooltip": "birth_rate" }
adolescent_trans_df = adolescent_trans_df.rename(columns=adolescent_rename)

# Format the data by setting the index, and extracting specific years 2016-2019
adolescent_trans_df = adolescent_trans_df[adolescent_trans_df["year"] > 2016]
adolescent_trans_df.set_index("country", inplace=True)
adolescent_trans_df

Unnamed: 0_level_0,year,birth_rate
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2017,62.0
Albania,2017,15.9
Andorra,2018,2.7
Andorra,2017,3.6
Antigua and Barbuda,2018,27.7
...,...,...
Uruguay,2018,35.8
Uruguay,2017,41.6
Uzbekistan,2017,18.9
Zambia,2017,135.0


In [6]:
# Set up the Database variables
username = "postgres"
password = postgresql_pass
database = "etl-project_db"
DATABASE_URL = f"postgres://{username}:{password}@localhost/{database}"

# Connect to the database
engine = create_engine(DATABASE_URL)
connection = engine.connect()

In [7]:
adolescent_trans_df.to_sql(name='adolescent', con=engine, if_exists='append', index=True)

In [8]:
infant_trans_df.to_sql(name='infant', con=engine, if_exists='append', index=True)