In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine

In [5]:
# Import location files
# File to Load 
csv_file_geo = "Resources/countries_geo.csv"
geo_df = pd.read_csv(csv_file_geo)
geo_df.head()

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla


In [21]:
#drop the current country column that is not valuable
geo_drop = geo_df.drop(columns=['country'])
geo_drop.head()

Unnamed: 0,latitude,longitude,name
0,42.546245,1.601554,Andorra
1,23.424076,53.847818,United Arab Emirates
2,33.93911,67.709953,Afghanistan
3,17.060816,-61.796428,Antigua and Barbuda
4,18.220554,-63.068615,Anguilla


In [24]:
#change "name" to "country" to prepare for the matching ID
geo_renamed = geo_drop.rename(columns={"name": "country"})
geo_renamed.head()

Unnamed: 0,latitude,longitude,country
0,42.546245,1.601554,Andorra
1,23.424076,53.847818,United Arab Emirates
2,33.93911,67.709953,Afghanistan
3,17.060816,-61.796428,Antigua and Barbuda
4,18.220554,-63.068615,Anguilla


In [8]:
# Import continent files
# File to Load 
csv_file_con = "Resources/countries_conti.csv"
con_df = pd.read_csv(csv_file_con)
con_df.head()

Unnamed: 0,name,code,continent
0,Afghanistan,AF,Asia
1,Armenia,AM,Asia
2,Azerbaijan,AZ,Asia
3,Bahrain,BH,Asia
4,Bangladesh,BD,Asia


In [26]:
#change "name" to "country" to prepare for the matching ID
con_renamed = con_df.rename(columns={"name": "country"})
con_renamed.head()

Unnamed: 0,country,code,continent
0,Afghanistan,AF,Asia
1,Armenia,AM,Asia
2,Azerbaijan,AZ,Asia
3,Bahrain,BH,Asia
4,Bangladesh,BD,Asia


In [33]:
#merge location detail files based on country
loc_info = pd.merge(geo_renamed,
                  con_renamed,
                  on='country')
loc_info.head()

Unnamed: 0,latitude,longitude,country,code,continent
0,23.424076,53.847818,United Arab Emirates,AE,Asia
1,33.93911,67.709953,Afghanistan,AF,Asia
2,40.069099,45.038189,Armenia,AM,Asia
3,40.143105,47.576927,Azerbaijan,AZ,Asia
4,23.684994,90.356331,Bangladesh,BD,Asia


In [34]:
#rearrange the order of columns
loc_info = loc_info[['country', 'latitude', 'longitude', 'continent', 'code']]
loc_info.head()

Unnamed: 0,country,latitude,longitude,continent,code
0,United Arab Emirates,23.424076,53.847818,Asia,AE
1,Afghanistan,33.93911,67.709953,Asia,AF
2,Armenia,40.069099,45.038189,Asia,AM
3,Azerbaijan,40.143105,47.576927,Asia,AZ
4,Bangladesh,23.684994,90.356331,Asia,BD


In [39]:
# Connect to local Database
USERNAME = "postgres"
PASSWORD = "postgres"
DATABASE = "Project_2"
rds_connection_string = f"{USERNAME}:{PASSWORD}@localhost:5432/{DATABASE}"
print(rds_connection_string)
engine = create_engine(f'postgresql://{rds_connection_string}')

postgres:postgres@localhost:5432/Project_2


In [None]:
# Check for tables
engine.table_names()

In [None]:
# Use pandas to load csv converted DataFrame into database
loc_info.to_sql(name='loc', con=engine, if_exists='append', index=False)

# check for loaded data
pd.read_sql_query('select * from loc', con=engine).head()