In [1]:
# Python SQL toolkit and Object Relational Mapper
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, MetaData

# Import the uri from the scripts directory
import sys
sys.path.append('../scripts')
from config import dev_uri, test_uri, prod_uri

In [2]:
# pass chosen uri string into engine depending on environment
uri = prod_uri
engine = create_engine(uri)

In [3]:
# delete all table data before appending
meta = MetaData()
meta.reflect(bind=engine)
for table in reversed(meta.sorted_tables):
    engine.execute(table.delete())

In [4]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

In [5]:
# We can view all of the classes that automap found
sorted(Base.classes.keys())

['gdci_city_country',
 'gdci_expenses',
 'gdci_tourists',
 'un_city_population',
 'world_bank_country_classification',
 'world_bank_indicator_code',
 'world_bank_tourism',
 'world_happiness_report']

In [6]:
# grab all the CSVs containing data
all_csvs = !ls "../../db/data"/*.csv
all_csvs

['../../db/data/GDCI_city_country.csv',
 '../../db/data/GDCI_expenses.csv',
 '../../db/data/GDCI_tourists.csv',
 '../../db/data/un_city_population_full.csv',
 '../../db/data/un_city_population_in_scope.csv',
 '../../db/data/world_bank_country_classification.csv',
 '../../db/data/world_bank_indicator_code.csv',
 '../../db/data/world_bank_tourism.csv',
 '../../db/data/world_happiness_report_2017_to_2019.csv']

In [7]:
# transform CSVs into DataFrames
csv_df = []
for i in range(len(all_csvs)):
    csv_df.append(pd.read_csv(all_csvs[i]))

In [8]:
# save DataFrames into easier-to-read ones
GDCI_city_country_df                 = csv_df[0]
GDCI_expenses_df                     = csv_df[1]
GDCI_tourists_df                     = csv_df[2]
un_city_population_df                = csv_df[4]
world_bank_country_classification_df = csv_df[5]
world_bank_indicator_code_df         = csv_df[6]
world_bank_tourism_df                = csv_df[7]
world_happiness_report_df            = csv_df[8]

In [9]:
# write out rows in DataFrames to Postgres tables
GDCI_city_country_df.to_sql('gdci_city_country', con=engine, index=False, if_exists='append')
GDCI_expenses_df.to_sql('gdci_expenses', con=engine, index=False, if_exists='append')
GDCI_tourists_df.to_sql('gdci_tourists', con=engine, index=False, if_exists='append')
un_city_population_df.to_sql('un_city_population', con=engine, index=False, if_exists='append')
world_bank_country_classification_df.to_sql('world_bank_country_classification', con=engine, index=False, if_exists='append')
world_bank_indicator_code_df.to_sql('world_bank_indicator_code', con=engine, index=False, if_exists='append')
world_bank_tourism_df.to_sql('world_bank_tourism', con=engine, index=False, if_exists='append')
world_happiness_report_df.to_sql('world_happiness_report', con=engine, index=False, if_exists='append')

In [10]:
# close all connections coming from engine
engine.dispose()