# Part 1
* Import dependencies
* Define source data, including
    * Resources/human_development_index.csv, a list of HDI scores for the world's countries put out by the UN
    * Resources/edu.csv, a list of education indicators put out by the World Bank and downloaded from Kaggle
    * Resources/countries_geo.csv, a list of countries by geographic coordinates
    * Resources/continent.csv, a list of countries by continent


In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import numpy as np

#pycountry is a library that we'll use to standardize the names of the countries in our miscellaneous datasets. 
#It must be pip-instealled (pip install pycountry) before you can use it. 
import pycountry

In [2]:
#HDI Spreadsheet
csv_hdi = "Resources/human_development_index.csv"

#EDU Spreadsheet
csv_edu = "Resources/edu.csv"

#Geographic Coordinates
csv_geo = "Resources/countries_geo.csv"

#Continents
csv_con = "Resources/continent.csv"

# Part 2
* Process geographic coordinate and country files
* Standardize country names within this two sheets and merge them as df_geo
* Set the standardized country name as the index
* Verify

In [3]:
#load the "csv_geo" file
geo_df = pd.read_csv(csv_geo)
geo_df.head()

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla


In [4]:
#drop the current country column that is not valuable
geo_drop = geo_df.drop(columns=['country'])
geo_drop.head()

Unnamed: 0,latitude,longitude,name
0,42.546245,1.601554,Andorra
1,23.424076,53.847818,United Arab Emirates
2,33.93911,67.709953,Afghanistan
3,17.060816,-61.796428,Antigua and Barbuda
4,18.220554,-63.068615,Anguilla


In [5]:
#change "name" to "country" to prepare for the matching ID
geo_renamed = geo_drop.rename(columns={"name": "country"})
geo_renamed.head()

Unnamed: 0,latitude,longitude,country
0,42.546245,1.601554,Andorra
1,23.424076,53.847818,United Arab Emirates
2,33.93911,67.709953,Afghanistan
3,17.060816,-61.796428,Antigua and Barbuda
4,18.220554,-63.068615,Anguilla


In [6]:
#Load the csv_con file

con_df = pd.read_csv(csv_con)
con_df.head()

Unnamed: 0,country,code,country-code,sub-region,intermediate-region,region-code
0,Afghanistan,AF,AFG,Asia,Southern Asia,
1,Aland Islands,AX,ALA,Europe,Northern Europe,
2,Albania,AL,ALB,Europe,Southern Europe,
3,Algeria,DZ,DZA,Africa,Northern Africa,
4,American Samoa,AS,ASM,Oceania,Polynesia,


In [7]:
#change "name" to "country" to prepare for the matching ID
con_renamed = con_df.rename(columns={"name": "country"})
con_renamed.head()

Unnamed: 0,country,code,country-code,sub-region,intermediate-region,region-code
0,Afghanistan,AF,AFG,Asia,Southern Asia,
1,Aland Islands,AX,ALA,Europe,Northern Europe,
2,Albania,AL,ALB,Europe,Southern Europe,
3,Algeria,DZ,DZA,Africa,Northern Africa,
4,American Samoa,AS,ASM,Oceania,Polynesia,


In [8]:
#merge location detail files based on country
df_geo = pd.merge(geo_renamed,
                  con_renamed,
                  on='country')
df_geo.head()

Unnamed: 0,latitude,longitude,country,code,country-code,sub-region,intermediate-region,region-code
0,42.546245,1.601554,Andorra,AD,AND,Europe,Southern Europe,
1,23.424076,53.847818,United Arab Emirates,AE,ARE,Asia,Western Asia,
2,33.93911,67.709953,Afghanistan,AF,AFG,Asia,Southern Asia,
3,17.060816,-61.796428,Antigua and Barbuda,AG,ATG,Americas,Latin America and the Caribbean,Caribbean
4,18.220554,-63.068615,Anguilla,AI,AIA,Americas,Latin America and the Caribbean,Caribbean


In [9]:
#rearrange the order of columns
df_geo = df_geo[['country', 'latitude', 'longitude', 'code','country-code','sub-region','intermediate-region']]
df_geo.head()

Unnamed: 0,country,latitude,longitude,code,country-code,sub-region,intermediate-region
0,Andorra,42.546245,1.601554,AD,AND,Europe,Southern Europe
1,United Arab Emirates,23.424076,53.847818,AE,ARE,Asia,Western Asia
2,Afghanistan,33.93911,67.709953,AF,AFG,Asia,Southern Asia
3,Antigua and Barbuda,17.060816,-61.796428,AG,ATG,Americas,Latin America and the Caribbean
4,Anguilla,18.220554,-63.068615,AI,AIA,Americas,Latin America and the Caribbean


In [10]:
#this is the code that will assign a standardized country name to each country in the list.
#it takes a second to run

country_std = []

for country in df_geo.country:
    try:
        result = pycountry.countries.search_fuzzy(country)
    except LookupError:
        result = 0
    if result != 0:
        string = str(result[0])
        string2 = string.split("'")
        final = string2[5]
    else:
        final = np.nan
    
    country_std.append(final)

df_geo['country_std'] = country_std

df_geo.head()

Unnamed: 0,country,latitude,longitude,code,country-code,sub-region,intermediate-region,country_std
0,Andorra,42.546245,1.601554,AD,AND,Europe,Southern Europe,Andorra
1,United Arab Emirates,23.424076,53.847818,AE,ARE,Asia,Western Asia,United Arab Emirates
2,Afghanistan,33.93911,67.709953,AF,AFG,Asia,Southern Asia,Afghanistan
3,Antigua and Barbuda,17.060816,-61.796428,AG,ATG,Americas,Latin America and the Caribbean,Antigua and Barbuda
4,Anguilla,18.220554,-63.068615,AI,AIA,Americas,Latin America and the Caribbean,Anguilla


In [11]:
#fixing some faults in the Pycountry program that I can't fix automatically so I'm doing it by hand.
#Specifically PyCountry thinks that Niger is part of Nigeria (and is wrong)
#And while it normally lists island protectorates and overseas territories separately (e.g. Guernsey is Guernesy),
#For some of the French overseas protectorates they list them as part of France.
#These errors mean I can't use the standard country name as a Primary Key later so I'm fixing them. 

for index, row in df_geo.iterrows():
    if row[0] == "Niger":
        print("Niger")
        df_geo.loc[index, 'country_std'] = "Niger"
    elif row[0] == "Guadeloupe":
        print("Guadeloupe")
        df_geo.loc[index, 'country_std'] = "Guadeloupe"
    elif row[0] == "Mayotte":
        df_geo.loc[index, 'country_std'] = "Mayotte"
        print("Mayotte")

Guadeloupe
Niger
Mayotte


### The next cell contains what should be the final Geographic dataframe for the purpose of configuring the SQL table.

In [12]:
df_geo = df_geo.set_index("country_std")

#Discovered the hard way that SQL doesn't care for hyphens much so I'm replacing the hyphens with underscores.
df_geo.columns = ['country', 'latitude', 'longitude', 'code', 'country_code', 'sub_region', 'intermediate_region']

df_geo.head()

Unnamed: 0_level_0,country,latitude,longitude,code,country_code,sub_region,intermediate_region
country_std,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Andorra,Andorra,42.546245,1.601554,AD,AND,Europe,Southern Europe
United Arab Emirates,United Arab Emirates,23.424076,53.847818,AE,ARE,Asia,Western Asia
Afghanistan,Afghanistan,33.93911,67.709953,AF,AFG,Asia,Southern Asia
Antigua and Barbuda,Antigua and Barbuda,17.060816,-61.796428,AG,ATG,Americas,Latin America and the Caribbean
Anguilla,Anguilla,18.220554,-63.068615,AI,AIA,Americas,Latin America and the Caribbean


# Part 3
* Process HDI files as df_hdi
* Standardize country names within this two sheets and merge them
* Set the standardized country name as the index
* Verify

In [13]:
#import

df_hdi = pd.read_csv(csv_hdi)

df_hdi.head()

Unnamed: 0,country,hdi_rank_2018,1990,1991,1992,1993,1994,1995,1996,1997,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Afghanistan,170.0,0.298,0.304,0.312,0.308,0.303,0.327,0.331,0.335,...,0.447,0.464,0.465,0.479,0.485,0.488,0.49,0.491,0.493,0.496
1,Albania,69.0,0.644,0.625,0.608,0.611,0.617,0.629,0.639,0.639,...,0.729,0.74,0.759,0.771,0.781,0.787,0.788,0.788,0.789,0.791
2,Algeria,82.0,0.578,0.582,0.589,0.593,0.597,0.602,0.61,0.619,...,0.72,0.73,0.738,0.737,0.746,0.749,0.751,0.755,0.758,0.759
3,Andorra,36.0,,,,,,,,,...,0.83,0.828,0.827,0.849,0.846,0.853,0.85,0.854,0.852,0.857
4,Angola,149.0,,,,,,,,,...,0.508,0.51,0.525,0.537,0.547,0.557,0.565,0.57,0.576,0.574


In [14]:
df_hdi = df_hdi[["country", "hdi_rank_2018", "2018", "2015", "2010", "2005"]]

df_hdi.head()

Unnamed: 0,country,hdi_rank_2018,2018,2015,2010,2005
0,Afghanistan,170.0,0.496,0.49,0.464,0.41
1,Albania,69.0,0.791,0.788,0.74,0.702
2,Algeria,82.0,0.759,0.751,0.73,0.694
3,Andorra,36.0,0.857,0.85,0.828,0.819
4,Angola,149.0,0.574,0.565,0.51,0.453


In [15]:
#Alright, so the UN names ("Venezuela (Bolivarian Republic of)") are causing Pycountry to choke because it doesn't like the perentheses. 
#This is my crack at solving it without resorting to doing it by hand

df_hdi = df_hdi.replace(to_replace=" \(", value=", ", regex=True)
df_hdi = df_hdi.replace(to_replace="\)", value="", regex=True)

#this significantly improves though does not perfect output.

In [16]:
#careful, this cell takes a fair amount of time to run.
#this is the code that will assign a standardized country name to each country in the list.

country_std = []

for country in df_hdi.country:
    try:
        result = pycountry.countries.search_fuzzy(country)
    except LookupError:
        result = 0
    if result != 0:
        string = str(result[0])
        string2 = string.split("'")
        final = string2[5]
    else:
        final = np.nan
    
    country_std.append(final)

df_hdi['country_std'] = country_std

df_hdi.head()

Unnamed: 0,country,hdi_rank_2018,2018,2015,2010,2005,country_std
0,Afghanistan,170.0,0.496,0.49,0.464,0.41,Afghanistan
1,Albania,69.0,0.791,0.788,0.74,0.702,Albania
2,Algeria,82.0,0.759,0.751,0.73,0.694,Algeria
3,Andorra,36.0,0.857,0.85,0.828,0.819,Andorra
4,Angola,149.0,0.574,0.565,0.51,0.453,Angola


In [17]:
#fixing some faults in the Pycountry program that I can't fix automatically so I'm doing it by hand.
#Specifically PyCountry thinks that Niger is part of Nigeria (and is wrong)
#And while it normally lists island protectorates and overseas territories separately (e.g. Guernsey is Guernesy),
#For some of the French overseas protectorates they list them as part of France.
#These errors mean I can't use the standard country name as a Primary Key later so I'm fixing them. 

for index, row in df_hdi.iterrows():
    if row[0] == "Niger":
        print("Niger")
        df_hdi.loc[index, 'country_std'] = "Niger"
    elif row[0] == "Guadeloupe":
        print("Guadeloupe")
        df_hdi.loc[index, 'country_std'] = "Guadeloupe"
    elif row[0] == "Mayotte":
        df_hdi.loc[index, 'country_std'] = "Mayotte"
        print("Mayotte")

Niger


### The next cell contains what should be the final HDI dataframe for the purpose of configuring the SQL table.

In [18]:
#because I'm using "country_std" as a primary key, we must sadly eliminate those countries that still don't have one (e.g. D.R. Congo for some reason)
df_hdi = df_hdi.dropna(axis=0, subset=['country_std'])

df_hdi = df_hdi.set_index("country_std", drop=True)

#can't name columns with a number in SQL, apparently,s o I've replaced them here.
df_hdi.columns = ['country', 'hdi_rank_2018', 'hdi_2018', 'hdi_2015', 'hdi_2010', 'hdi_2005']

df_hdi.head()

Unnamed: 0_level_0,country,hdi_rank_2018,hdi_2018,hdi_2015,hdi_2010,hdi_2005
country_std,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,Afghanistan,170.0,0.496,0.49,0.464,0.41
Albania,Albania,69.0,0.791,0.788,0.74,0.702
Algeria,Algeria,82.0,0.759,0.751,0.73,0.694
Andorra,Andorra,36.0,0.857,0.85,0.828,0.819
Angola,Angola,149.0,0.574,0.565,0.51,0.453


# Part 4
* Process EDU files as df_edu
* Transpose them so we can use the country name as the primary key like for the other dfs
* Generate a standardized country name and set it as the index
* Verify

In [34]:
df_edu = pd.read_csv(csv_edu)

df_edu.head()

Unnamed: 0,country,c_codes,cal_year,gnp,population,edu_prime,edu_second,edu_post_second,edu_lower_second,unemp
0,ARAB WORLD,,2017,16997.50354,414491886.0,,,,,
1,ARAB WORLD,,2016,16794.06495,406452690.0,83.911949,,,,
2,ARAB WORLD,,2015,16366.5964,398304960.0,84.720306,,,,
3,ARAB WORLD,,2014,15895.25148,390043028.0,85.314812,,,,
4,ARAB WORLD,,2013,15453.22069,381702086.0,84.703484,,,,


In [35]:
# Filter years for 2005, 2010 & 2015

# Indicate reference years
years = (2005, 2010, 2015)

# Filter reference years
df_edu = df_edu[df_edu['cal_year'].isin(years)]

# Select columns
df_edu = df_edu[['country','cal_year', 'gnp', 'population', 'edu_prime']]

df_edu.head()

Unnamed: 0,country,cal_year,gnp,population,edu_prime
2,ARAB WORLD,2015,16366.5964,398304960.0,84.720306
7,ARAB WORLD,2010,13942.73328,356508908.0,85.171822
12,ARAB WORLD,2005,11537.5954,316264728.0,82.122704
60,CARIBBEAN SMALL STATES,2015,15516.023,7204948.0,
65,CARIBBEAN SMALL STATES,2010,13380.00937,6984096.0,91.29953


In [36]:
#disaggregating the dataframes by years so that we can use the country name as the primary key later.

#2005
df_edu2005 = pd.DataFrame(columns = df_edu.columns)

cond2005 = df_edu["cal_year"] == 2005
rows2005 = df_edu.loc[cond2005, :]
df_edu2005 = df_edu2005.append(rows2005, ignore_index=True)

#2010

df_edu2010 = pd.DataFrame(columns = df_edu.columns)

cond2010 = df_edu["cal_year"] == 2010
rows2010 = df_edu.loc[cond2010, :]
df_edu2010 = df_edu2010.append(rows2010, ignore_index=True)

#2015

df_edu2015 = pd.DataFrame(columns = df_edu.columns)

cond2015 = df_edu["cal_year"] == 2015
rows2015 = df_edu.loc[cond2015, :]
df_edu2015 = df_edu2015.append(rows2015, ignore_index=True)

In [37]:
df_edu_combined = df_edu2005.merge(df_edu2010, how='inner', on='country', suffixes = ("_2005", "_2010"))

df_edu_combined.head()

Unnamed: 0,country,cal_year_2005,gnp_2005,population_2005,edu_prime_2005,cal_year_2010,gnp_2010,population_2010,edu_prime_2010
0,ARAB WORLD,2005,11537.5954,316264700.0,82.122704,2010,13942.73328,356508900.0,85.171822
1,CARIBBEAN SMALL STATES,2005,11182.94029,6761932.0,89.859917,2010,13380.00937,6984096.0,91.29953
2,CENTRAL EUROPE AND THE BALTICS,2005,13901.89366,106173800.0,96.948441,2010,20007.60054,104543800.0,96.926491
3,EARLY-DEMOGRAPHIC DIVIDEND,2005,5098.028384,2691528000.0,84.594681,2010,6859.388641,2909411000.0,89.960541
4,EAST ASIA & PACIFIC,2005,8010.371761,2131363000.0,91.439407,2010,11732.34576,2207155000.0,102.004013


In [38]:
df_edu_final = df_edu_combined.merge(df_edu2015, how='inner', on='country')

df_edu_final.head()

Unnamed: 0,country,cal_year_2005,gnp_2005,population_2005,edu_prime_2005,cal_year_2010,gnp_2010,population_2010,edu_prime_2010,cal_year,gnp,population,edu_prime
0,ARAB WORLD,2005,11537.5954,316264700.0,82.122704,2010,13942.73328,356508900.0,85.171822,2015,16366.5964,398305000.0,84.720306
1,CARIBBEAN SMALL STATES,2005,11182.94029,6761932.0,89.859917,2010,13380.00937,6984096.0,91.29953,2015,15516.023,7204948.0,
2,CENTRAL EUROPE AND THE BALTICS,2005,13901.89366,106173800.0,96.948441,2010,20007.60054,104543800.0,96.926491,2015,24919.1779,103257800.0,97.056419
3,EARLY-DEMOGRAPHIC DIVIDEND,2005,5098.028384,2691528000.0,84.594681,2010,6859.388641,2909411000.0,89.960541,2015,8725.901971,3127579000.0,93.682671
4,EAST ASIA & PACIFIC,2005,8010.371761,2131363000.0,91.439407,2010,11732.34576,2207155000.0,102.004013,2015,16135.57117,2283108000.0,93.585663


In [39]:
df_edu_final = df_edu_final.rename(columns={"cal_year": "cal_year_2015", "gnp": "gnp_2015", "population": "population_2015", "edu_prime": "edu_prime_2015"})

In [40]:
df_edu_final = df_edu_final.drop(['cal_year_2005', 'cal_year_2010', 'cal_year_2015'], axis=1)

df_edu_final.head()

Unnamed: 0,country,gnp_2005,population_2005,edu_prime_2005,gnp_2010,population_2010,edu_prime_2010,gnp_2015,population_2015,edu_prime_2015
0,ARAB WORLD,11537.5954,316264700.0,82.122704,13942.73328,356508900.0,85.171822,16366.5964,398305000.0,84.720306
1,CARIBBEAN SMALL STATES,11182.94029,6761932.0,89.859917,13380.00937,6984096.0,91.29953,15516.023,7204948.0,
2,CENTRAL EUROPE AND THE BALTICS,13901.89366,106173800.0,96.948441,20007.60054,104543800.0,96.926491,24919.1779,103257800.0,97.056419
3,EARLY-DEMOGRAPHIC DIVIDEND,5098.028384,2691528000.0,84.594681,6859.388641,2909411000.0,89.960541,8725.901971,3127579000.0,93.682671
4,EAST ASIA & PACIFIC,8010.371761,2131363000.0,91.439407,11732.34576,2207155000.0,102.004013,16135.57117,2283108000.0,93.585663


In [41]:
#careful, this cell takes a fair amount of time to run.
df_edu = df_edu_final

#this is the code that will assign a standardized country name to each country in the list.

country_std = []

for country in df_edu.country:
    try:
        result = pycountry.countries.search_fuzzy(country)
    except LookupError:
        result = 0
    if result != 0:
        string = str(result[0])
        string2 = string.split("'")
        final = string2[5]
    else:
        final = np.nan
    
    country_std.append(final)

df_edu['country_std'] = country_std

df_edu.head()

Unnamed: 0,country,gnp_2005,population_2005,edu_prime_2005,gnp_2010,population_2010,edu_prime_2010,gnp_2015,population_2015,edu_prime_2015,country_std
0,ARAB WORLD,11537.5954,316264700.0,82.122704,13942.73328,356508900.0,85.171822,16366.5964,398305000.0,84.720306,
1,CARIBBEAN SMALL STATES,11182.94029,6761932.0,89.859917,13380.00937,6984096.0,91.29953,15516.023,7204948.0,,
2,CENTRAL EUROPE AND THE BALTICS,13901.89366,106173800.0,96.948441,20007.60054,104543800.0,96.926491,24919.1779,103257800.0,97.056419,
3,EARLY-DEMOGRAPHIC DIVIDEND,5098.028384,2691528000.0,84.594681,6859.388641,2909411000.0,89.960541,8725.901971,3127579000.0,93.682671,
4,EAST ASIA & PACIFIC,8010.371761,2131363000.0,91.439407,11732.34576,2207155000.0,102.004013,16135.57117,2283108000.0,93.585663,


In [42]:
#fixing some faults in the Pycountry program that I can't fix automatically so I'm doing it by hand.
#Specifically PyCountry thinks that Niger is part of Nigeria (and is wrong)
#And while it normally lists island protectorates and overseas territories separately (e.g. Guernsey is Guernesy),
#For some of the French overseas protectorates they list them as part of France.
#These errors mean I can't use the standard country name as a Primary Key later so I'm fixing them. 

for index, row in df_edu.iterrows():
    if row[0] == ("NIGER"):
        print("Niger")
        df_edu.loc[index, 'country_std'] = "Niger"
    elif row[0] == "GUADELOUPE":
        print("Guadeloupe")
        df_edu.loc[index, 'country_std'] = "Guadeloupe"
    elif row[0] == "MAYOTTE":
        df_edu.loc[index, 'country_std'] = "Mayotte"
        print("Mayotte")
    elif row[0] == "CURACAO":
        df_edu.loc[index, 'country_std'] = "Curacao"
        print("Curacao")
    elif row[0] == "KOSOVO":
        df_edu.loc[index, 'country_std'] = "Kosovo"
        print("Kosovo")

Curacao
Kosovo
Niger


In [43]:
#drop the null rows

df_edu = df_edu.dropna(axis=0, subset=['country_std'])

df_edu.head()

Unnamed: 0,country,gnp_2005,population_2005,edu_prime_2005,gnp_2010,population_2010,edu_prime_2010,gnp_2015,population_2015,edu_prime_2015,country_std
47,AFGHANISTAN,1010.0,25070798.0,,1590.0,28803167.0,,1940.0,33736494.0,,Afghanistan
48,ALBANIA,6060.0,3011487.0,,9540.0,2913021.0,89.237343,11470.0,2880703.0,104.468193,Albania
49,ALGERIA,10430.0,33288437.0,93.185028,12580.0,36117637.0,95.538918,14220.0,39871528.0,105.436417,Algeria
50,AMERICAN SAMOA,,59118.0,,,55637.0,,,55537.0,,American Samoa
51,ANDORRA,,78867.0,,,84449.0,,,78014.0,,Andorra


In [44]:
len(df_edu)

195

### The next cell should contain the final education table for the purposes of creating the SQL tables.

In [45]:
df_edu = df_edu.set_index("country_std")

df_edu.head()

Unnamed: 0_level_0,country,gnp_2005,population_2005,edu_prime_2005,gnp_2010,population_2010,edu_prime_2010,gnp_2015,population_2015,edu_prime_2015
country_std,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Afghanistan,AFGHANISTAN,1010.0,25070798.0,,1590.0,28803167.0,,1940.0,33736494.0,
Albania,ALBANIA,6060.0,3011487.0,,9540.0,2913021.0,89.237343,11470.0,2880703.0,104.468193
Algeria,ALGERIA,10430.0,33288437.0,93.185028,12580.0,36117637.0,95.538918,14220.0,39871528.0,105.436417
American Samoa,AMERICAN SAMOA,,59118.0,,,55637.0,,,55537.0,
Andorra,ANDORRA,,78867.0,,,84449.0,,,78014.0,


# Part 5
* Export everything to postgres


In [46]:
#set up a postgres connection

from login import username, password

rds_connection_string = f"{username}:{password}@localhost/Project_2"
    
print(rds_connection_string)

engine = create_engine(f'postgresql://{rds_connection_string}')

postgres:postgres@localhost/Project_2


In [47]:
engine.table_names()

['edu', 'hdi', 'geo']

In [49]:
#df_geo (the geography dataframe) to SQL

df_geo.to_sql(name='geo', con=engine, if_exists='append')

#df_hdi (the HDI dataframe) to SQL

df_hdi.to_sql(name='hdi', con=engine, if_exists='append')

#df_edu (the education dataframe) to SQL

df_edu.to_sql(name='edu', con=engine, if_exists='append')
