Requirements:
<br>
ALL .csv files for this project
* resources/all_us_highways.csv
* resources/all_us_interstates.csv
* resources/blue_laws.csv
* resources/state_population_data_july2020.csv
* resources/US_Accidents_Dec20_Updated.csv
* resources/weed_legalization_raw_scraped.csv
* output_tables/data_grouped_by_state.csv
* output_tables/land_area_by_state_scraped.csv
* output_tables/road_distance_by_state.csv
<br>
Warning! This script will pull any and all .csv files in your "resources" and "output_tables" folders.
<br>
Be cautious of what files you have in there.

In [1]:
import pandas as pd
import sqlalchemy
import psycopg2
import os
import getpass
import re
import time

In [2]:
# WARNING: THIS SCRIPT WILL TAKE EVERYTHING
main_tables = ['resources/' + i for i in os.listdir('resources/') if i.endswith('.csv')]
output_tables = ['output_tables/' + i for i in os.listdir('output_tables')]

# Acquire Credentials
#### Enter DB Password and Username below.
#### By default, pgadmin sets your user to 'postgres'

In [3]:
print('Please Enter the AWS Endpoint and Press "Enter": ')
endpoint = getpass.getpass()

Please Enter the AWS Endpoint and Press "Enter": 
········


In [4]:
print('Please Enter the AWS RDS Password and Press "Enter":')
db_pass = getpass.getpass()

Please Enter the AWS RDS Password and Press "Enter":
········


In [5]:
username = 'postgres'

In [6]:
# Database Creation

def make_db(db_pass, username):

    db_conn = psycopg2.connect(
       database=username, user=username, password=db_pass, host=endpoint, port= '5432'
    )
    db_conn.autocommit = True
    cursor = db_conn.cursor()

    #Create New Database
    try:
        cursor.execute('''CREATE database BootcampFinalProject_Accidents;''')
        print('Database Created: "bootcampfinalproject_accidents"\n')
    except Exception as exc:
        print('Failed to Make Database, see Exception below:\n')
        print(exc)
        
    return cursor
    
    
def connect_db(db_pass):
    #Connect to new database
    try:
        db_string = f"postgresql://postgres:{db_pass}@{endpoint}:5432/bootcampfinalproject_accidents"
        engine = sqlalchemy.create_engine(db_string)
        print('Connected to the newly created database.')
    except Exception as exc:
        print('FAILED TO CONNECT to the newly created database')
        print('exc')
    
    return engine

In [7]:
# Table Creation - Input a Pandas Dataframe and send .to_sql
def create_table(filename, iteration, total):
    
    if filename.startswith('resources/non_freeway_roads/'):
        table_name = re.findall('\/\w*\/(.*).csv', i)[0]
    else:
        table_name = re.findall('\/(.*).csv', i)[0]
        
    try:
        print(f"{iteration}/{total}: -----{filename}-----")
        df = pd.read_csv(filename)
        print(f"Dataframe Created for: {filename}")
    except:
        print(f"FAILED TO CREATE DATAFRAME FOR: {filename}")
        
        
    print('\n')
        
    try:
        df.to_sql(name = table_name, con = engine, if_exists='replace', index=False)
        print("Successfully pushed to pgAdmin\n")
        print('-----------------------------------\n\n')
    except Exception as eff:
        print(f"FAILED TO BE PUSHED to pgAdmin\n")
        print(eff)
        print('-----------------------------------\n\n')


In [8]:
cursor = make_db(db_pass, username)
engine = connect_db(db_pass)

Failed to Make Database, see Exception below:

database "bootcampfinalproject_accidents" already exists

Connected to the newly created database.


In [10]:
total_num_main = len(main_tables)
total_num_outputs = len(output_tables)

start_time = time.perf_counter()
main_fails = []
for h, i in enumerate(main_tables):
    try:
        create_table(i, h+1, total_num_main)
    except:
        main_fails.append(i)
        
        
output_fails = []
for h, i in enumerate(output_tables):
    try:
        create_table(i, h+1, total_num_outputs)
    except:
        output_fails.append(i)

1/6: -----resources/all_us_highways.csv-----
Dataframe Created for: resources/all_us_highways.csv


Successfully pushed to pgAdmin

-----------------------------------


2/6: -----resources/all_us_interstates.csv-----
Dataframe Created for: resources/all_us_interstates.csv


Successfully pushed to pgAdmin

-----------------------------------


3/6: -----resources/blue_laws.csv-----
Dataframe Created for: resources/blue_laws.csv


Successfully pushed to pgAdmin

-----------------------------------


4/6: -----resources/state_population_data_july2020.csv-----
Dataframe Created for: resources/state_population_data_july2020.csv


Successfully pushed to pgAdmin

-----------------------------------


5/6: -----resources/US_Accidents_Dec20_Updated.csv-----
Dataframe Created for: resources/US_Accidents_Dec20_Updated.csv


Successfully pushed to pgAdmin

-----------------------------------


6/6: -----resources/weed_legalization_raw_scraped.csv-----
Dataframe Created for: resources/weed_legaliz

In [11]:
time_elapsed = round(time.perf_counter() - start_time, 3)

In [12]:
if len(main_fails) > 0:
    print('\n\nThe following files failed to push to pgAdmin:\n')

    for i in main_fails:
        print(i)
#     for i in state_fails:
#         print(i)
else:
    print('All Items Pushed Successfully!')
print('\n-------------------------------------------------\n')
print(f'Elapsed Time: {time_elapsed}')


All Items Pushed Successfully!

-------------------------------------------------

Elapsed Time: 879.769
