In [1]:
# !pip install sqlalchemy
# !pip install pymysql

In [2]:
# SQL Alchemy
from sqlalchemy import create_engine

# PyMySQL 
import pymysql
pymysql.install_as_MySQLdb()

# Config variables
import sys
sys.path.append('..')
from config import remote_gwsis_dbuser, remote_gwsis_dbpwd, remote_db_host, remote_db_port, remote_gwsis_dbname

# Import Pandas
import pandas as pd

In [3]:
# Cloud MySQL Database Connection on AWS
engine = create_engine(f"mysql://{remote_gwsis_dbuser}:{remote_gwsis_dbpwd}@{remote_db_host}:{remote_db_port}/{remote_gwsis_dbname}")

In [4]:
# Create a remote database engine connection
conn = engine.connect()

In [5]:
state_codes = pd.read_csv('geojson/state_code.csv')
state_codes.head()

Unnamed: 0,State/District,Postal Code
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [6]:
condition_data = pd.read_csv('bridge/condition_by-hwy-sys_all.csv')
condition_data.head()

Unnamed: 0,ID,Year,State,Count_All,Count_Good,Count_Fair,Count_Poor,Area_All,Area_Good,Area_Fair,Area_Poor
0,1,2009,Alabama,15959,7637,7384,934,9176079,3991613,4892783,290431
1,2,2009,Alaska,1151,518,496,135,657173,284249,314174,58564
2,3,2009,Arizona,7494,5274,2030,189,4685761,2826369,1697404,161735
3,4,2009,Arkansas,12560,7113,4809,638,5927762,3211634,2353584,362545
4,5,2009,California,24463,10275,10833,3258,27743006,8596833,13556904,5550592


In [7]:
bridge_summary = condition_data.merge(state_codes, left_on=["State"], right_on=["State/District"])
bridge_summary = bridge_summary[["ID",
                                 "Year",
                                 "State",
                                 "Count_All",
                                 "Count_Good",
                                 "Count_Fair",
                                 "Count_Poor",
                                 "Area_All",
                                 "Area_Good",
                                 "Area_Fair",
                                 "Area_Poor",
                                 "Postal Code"
                                ]]
bridge_summary = bridge_summary.rename(columns={"Postal Code":"State_Abbreviation"})
bridge_summary = bridge_summary.set_index("ID")
del bridge_summary.index.name
bridge_summary = bridge_summary.reset_index(drop=True)
bridge_summary["ID"] = bridge_summary.index + 1
bridge_summary.head()

Unnamed: 0,Year,State,Count_All,Count_Good,Count_Fair,Count_Poor,Area_All,Area_Good,Area_Fair,Area_Poor,State_Abbreviation,ID
0,2009,Alabama,15959,7637,7384,934,9176079,3991613,4892783,290431,AL,1
1,2010,Alabama,16018,7599,7506,912,9168322,3963495,4921237,283382,AL,2
2,2011,Alabama,16061,7435,7738,887,8951183,3766050,4914047,270877,AL,3
3,2016,Alabama,16098,7040,8338,720,9238041,3655465,5329991,252585,AL,4
4,2017,Alabama,16129,6899,8497,733,9207383,3498240,5458713,250430,AL,5


In [8]:
bridge_summary.to_sql(name='bridge_condition_summary', if_exists='replace', con=conn, chunksize=500, index=False)