Requirements:
<br>
ALL .csv files for this project
* resources/all_us_highways.csv
* resources/all_us_interstates.csv
* resources/blue_laws.csv
* resources/state_population_data_july2020.csv
* resources/US_Accidents_Dec20_Updated.csv
* resources/non_freeway_roads/{ALL LOWER 48 STATES}.CSV
<br>
Warning! This script will pull any and all .csv files in your "resources" and "resources/non_freeway_roads" folders.
<br>
Be cautious of what files you have in there.

In [1]:
import pandas as pd
import sqlalchemy
import psycopg2
import os
import getpass
import re
import time

In [2]:
# WARNING: THIS SCRIPT WILL TAKE EVERYTHING
main_tables = ['resources/' + i for i in os.listdir('resources/') if i.endswith('.csv')]
state_roads = ['resources/non_freeway_roads/' + i for i in os.listdir('resources/non_freeway_roads/')]

# Acquire Credentials
#### Enter DB Password and Username below.
#### By default, pgadmin sets your user to 'postgres'

In [3]:
print('Please Enter Your pgAdmin Database Password and Press "Enter":')
db_pass = getpass.getpass()

Please Enter Your pgAdmin Database Password and Press "Enter":
········


In [4]:
username = input('If you have set a custom username in your pgAdmin instance, enter it here.\nElse, enter "postgres"\n')

If you have set a custom username in your pgAdmin instance, enter it here.
Else, enter "postgres"
postgres


In [5]:
# Database Creation

def make_db(db_pass, username):

    db_conn = psycopg2.connect(
       database="postgres", user=username, password=db_pass, host='127.0.0.1', port= '5432'
    )
    db_conn.autocommit = True
    cursor = db_conn.cursor()

    #Create New Database
    try:
        cursor.execute('''CREATE database BootcampFinalProject_Accidents;''')
        print('Database Created: "bootcampfinalproject_accidents"\n')
    except Exception as exc:
        print('Failed to Make Database, see Exception below:\n')
        print(exc)
        
    return cursor
    
    
def connect_db(db_pass):
    #Connect to new database
    try:
        db_string = f"postgresql://postgres:{db_pass}@127.0.0.1:5432/bootcampfinalproject_accidents"
        engine = sqlalchemy.create_engine(db_string)
        print('Connected to the newly created database.')
    except Exception as exc:
        print('FAILED TO CONNECT to the newly created database')
        print('exc')
    
    return engine

In [6]:
# Table Creation - Input a Pandas Dataframe and send .to_sql
def create_table(filename, iteration, total):
    
    if filename.startswith('resources/non_freeway_roads/'):
        table_name = re.findall('\/\w*\/(.*).csv', i)[0]
    else:
        table_name = re.findall('\/(.*).csv', i)[0]
        
    try:
        print(f"{iteration}/{total}: -----{filename}-----")
        df = pd.read_csv(filename)
        print(f"Dataframe Created for: {filename}")
    except:
        print(f"FAILED TO CREATE DATAFRAME FOR: {filename}")
        
        
    print('\n')
        
    try:
        df.to_sql(name = table_name, con = engine, if_exists='replace', index=False)
        print("Successfully pushed to pgAdmin\n")
        print('-----------------------------------\n\n')
    except Exception as eff:
        print(f"FAILED TO BE PUSHED to pgAdmin\n")
        print(eff)
        print('-----------------------------------\n\n')


In [7]:
cursor = make_db(db_pass, username)
engine = connect_db(db_pass)

Database Created: "bootcampfinalproject_accidents"

Connected to the newly created database.


In [8]:
total_num_main = len(main_tables)
total_num_states = len(state_roads)

start_time = time.perf_counter()
main_fails = []
for h, i in enumerate(main_tables):
    try:
        create_table(i, h+1, total_num_main)
    except:
        main_fails.append(i)
        
        
state_fails = []
for h, i in enumerate(state_roads):
    try:
        create_table(i, h+1, total_num_states)
    except:
        state_fails.append(i)

1/5: -----resources/all_us_highways.csv-----
Dataframe Created for: resources/all_us_highways.csv


Successfully pushed to pgAdmin

-----------------------------------


2/5: -----resources/all_us_interstates.csv-----
Dataframe Created for: resources/all_us_interstates.csv


Successfully pushed to pgAdmin

-----------------------------------


3/5: -----resources/blue_laws.csv-----
Dataframe Created for: resources/blue_laws.csv


Successfully pushed to pgAdmin

-----------------------------------


4/5: -----resources/state_population_data_july2020.csv-----
Dataframe Created for: resources/state_population_data_july2020.csv


Successfully pushed to pgAdmin

-----------------------------------


5/5: -----resources/US_Accidents_Dec20_Updated.csv-----
Dataframe Created for: resources/US_Accidents_Dec20_Updated.csv


Successfully pushed to pgAdmin

-----------------------------------


1/49: -----resources/non_freeway_roads/Alabama.csv-----
Dataframe Created for: resources/non_freeway_roa

Successfully pushed to pgAdmin

-----------------------------------


39/49: -----resources/non_freeway_roads/South Carolina.csv-----
Dataframe Created for: resources/non_freeway_roads/South Carolina.csv


Successfully pushed to pgAdmin

-----------------------------------


40/49: -----resources/non_freeway_roads/South Dakota.csv-----
Dataframe Created for: resources/non_freeway_roads/South Dakota.csv


Successfully pushed to pgAdmin

-----------------------------------


41/49: -----resources/non_freeway_roads/Tennessee.csv-----
Dataframe Created for: resources/non_freeway_roads/Tennessee.csv


Successfully pushed to pgAdmin

-----------------------------------


42/49: -----resources/non_freeway_roads/Texas.csv-----
Dataframe Created for: resources/non_freeway_roads/Texas.csv


Successfully pushed to pgAdmin

-----------------------------------


43/49: -----resources/non_freeway_roads/Utah.csv-----
Dataframe Created for: resources/non_freeway_roads/Utah.csv


Successfully pushed to

In [9]:
time_elapsed = round(time.perf_counter() - start_time, 3)

In [14]:
if len(main_fails) > 0 or len(state_fails) > 0:
    print('\n\nThe following files failed to push to pgAdmin:\n')

    for i in main_fails:
        print(i)
    for i in state_fails:
        print(i)
else:
    print('All Items Pushed Successfully!')
print('\n-------------------------------------------------\n')
print(f'Elapsed Time: {time_elapsed}')


All Items Pushed Successfully!

-------------------------------------------------

Elapsed Time: 703.978
