# Hospital cleanup
We already scraped the google maps ratings and did some pre-cleanup on the hospital data in the scrapeRatings.ipynb notebook. Now we just need to finalize the data for our database

In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base

In [2]:
df = pd.read_csv('Resources/hospitalRatings.csv')
del df['Unnamed: 0']
df.head()

Unnamed: 0,NAME,ADDRESS,CITY,STATE,ZIP,OWNER,BEDS,RATING
0,HOUSTON METHODIST SAN JACINTO HOSPITAL ALEXAND...,1700 JAMES BOWIE DRIVE,BAYTOWN,TX,77520,NON-PROFIT,182.0,4.5
1,"WOODS AT PARKSIDE,THE",349 OLDE RIDENOUR ROAD,COLUMBUS,OH,43230,PROPRIETARY,50.0,4.1
2,DAYTON CHILDREN'S HOSPITAL,ONE CHILDRENS PLAZA,DAYTON,OH,45404,NON-PROFIT,155.0,4.0
3,VIBRA HOSPITAL OF MAHONING VALLEY,8049 SOUTH AVENUE,BOARDMAN,OH,44512,PROPRIETARY,45.0,4.6
4,HAVEN BEHAVIORAL SENIOR CARE OF DAYTON,"ONE ELIZABETH PLACE,E3 SUITE A",DAYTON,OH,45417,PROPRIETARY,32.0,2.5


In [3]:
# Looks like we're missing the website column. I must have deleted it by mistake in the original cleanup
# Let's grab it from the original data
hosp = pd.read_csv('Resources/Hospitals.csv')
hosp.head()

Unnamed: 0,X,Y,OBJECTID,ID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,VAL_DATE,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD
0,-94.945477,29.74762,8497,76777520,HOUSTON METHODIST SAN JACINTO HOSPITAL ALEXAND...,1700 JAMES BOWIE DRIVE,BAYTOWN,TX,77520,NOT AVAILABLE,...,2017-12-18T00:00:00.000Z,http://www.houstonmethodist.org/locations/san-...,NOT AVAILABLE,NOT AVAILABLE,48.0,NON-PROFIT,-999.0,182.0,NOT AVAILABLE,Y
1,-82.881843,40.027143,8498,129043230,"WOODS AT PARKSIDE,THE",349 OLDE RIDENOUR ROAD,COLUMBUS,OH,43230,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,http://www.thewoodsatparkside.com/,1815,NOT AVAILABLE,39.0,PROPRIETARY,-999.0,50.0,NOT AVAILABLE,NOT AVAILABLE
2,-84.168027,39.774242,8499,130045404,DAYTON CHILDREN'S HOSPITAL,ONE CHILDRENS PLAZA,DAYTON,OH,45404,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,http://www.childrensdayton.org/cms/home/index....,1411,NOT AVAILABLE,39.0,NON-PROFIT,-999.0,155.0,PEDIATRIC LEVEL II,Y
3,-80.632972,41.005169,8500,128844512,VIBRA HOSPITAL OF MAHONING VALLEY,8049 SOUTH AVENUE,BOARDMAN,OH,44512,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,http://www.mahoningvalleyhospital.com/,1428,MAHONING VALLEY HOSPITAL BOARDMAN CAMPUS,39.0,PROPRIETARY,-999.0,45.0,NOT AVAILABLE,NOT AVAILABLE
4,-84.199398,39.74774,8501,129845417,HAVEN BEHAVIORAL SENIOR CARE OF DAYTON,"ONE ELIZABETH PLACE,E3 SUITE A",DAYTON,OH,45417,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,https://dayton.havenbehavioral.com/,1506,NOT AVAILABLE,39.0,PROPRIETARY,-999.0,32.0,NOT AVAILABLE,NOT AVAILABLE


In [4]:
df = pd.merge(df, hosp[['NAME', 'WEBSITE']], on='NAME')
df.head()

Unnamed: 0,NAME,ADDRESS,CITY,STATE,ZIP,OWNER,BEDS,RATING,WEBSITE
0,HOUSTON METHODIST SAN JACINTO HOSPITAL ALEXAND...,1700 JAMES BOWIE DRIVE,BAYTOWN,TX,77520,NON-PROFIT,182.0,4.5,http://www.houstonmethodist.org/locations/san-...
1,"WOODS AT PARKSIDE,THE",349 OLDE RIDENOUR ROAD,COLUMBUS,OH,43230,PROPRIETARY,50.0,4.1,http://www.thewoodsatparkside.com/
2,DAYTON CHILDREN'S HOSPITAL,ONE CHILDRENS PLAZA,DAYTON,OH,45404,NON-PROFIT,155.0,4.0,http://www.childrensdayton.org/cms/home/index....
3,VIBRA HOSPITAL OF MAHONING VALLEY,8049 SOUTH AVENUE,BOARDMAN,OH,44512,PROPRIETARY,45.0,4.6,http://www.mahoningvalleyhospital.com/
4,HAVEN BEHAVIORAL SENIOR CARE OF DAYTON,"ONE ELIZABETH PLACE,E3 SUITE A",DAYTON,OH,45417,PROPRIETARY,32.0,2.5,https://dayton.havenbehavioral.com/


In [5]:
# Join with state_id table
states = pd.read_csv('Resources/Clean_data/state.csv')
df = pd.merge(df, states[['state_id', 'state_abr']], left_on='STATE', right_on = 'state_abr')[['state_id', 'NAME', 'ADDRESS', 'CITY', 'ZIP', 'OWNER', 'BEDS', 'RATING', 'WEBSITE']]
df

Unnamed: 0,state_id,NAME,ADDRESS,CITY,ZIP,OWNER,BEDS,RATING,WEBSITE
0,42,HOUSTON METHODIST SAN JACINTO HOSPITAL ALEXAND...,1700 JAMES BOWIE DRIVE,BAYTOWN,77520,NON-PROFIT,182.0,4.5,http://www.houstonmethodist.org/locations/san-...
1,42,PROVIDENCE HOSPITAL OF NORTH HOUSTON LLC,16750 RED OAK DR,HOUSTON,77090,PROPRIETARY,16.0,4.0,http://phnh.net/
2,42,MCCALLEN MEDICAL CENTER,301 WEST EXPRESSWAY 83,MCALLEN,78503,PROPRIETARY,441.0,4.0,http://www.mcallenmedicalcenter.com/
3,42,FAITH COMMUNITY HOSPITAL,215 CHISHOLM TRAIL,JACKSBORO,76458,PROPRIETARY,17.0,3.6,https://www.fchtexas.com
4,42,FAITH COMMUNITY HOSPITAL,215 CHISHOLM TRAIL,JACKSBORO,76458,PROPRIETARY,17.0,3.6,http://www.faithcommunityhospital.com
...,...,...,...,...,...,...,...,...,...
8403,7,ROCKFORD CENTER,100 ROCKFORD DR,NEWARK,19713,PROPRIETARY,92.0,2.0,http://www.rockfordcenter.com/
8404,7,SELECT SPECIALTY HOSPITAL OF WILMINGTON,701 N CLAYTON ST,WILMINGTON,19805,PROPRIETARY,35.0,4.2,http://wilmington.selectspecialtyhospitals.com/
8405,7,BEEBE MEDICAL CENTER,424 SAVANNAH RD,LEWES,19958,NON-PROFIT,210.0,2.8,http://www.beebemed.org/index.cfm
8406,7,BAYHEALTH KENT GENERAL HOSPITAL,640 S STATE ST,DOVER,19901,NON-PROFIT,236.0,2.6,http://www.bayhealth.org/Home/Facilities/tabid...


In [6]:
# Save data
df.to_csv('Resources/Clean_data/hospitals.csv')

In [27]:
# Connect to database
from secrets import username, password
Base = automap_base()
engine = create_engine(f'postgresql://{username}:{password}@localhost:5432/ETL')
conn = engine.connect()
Base.prepare(engine, reflect= True)
Row = Base.classes.hospitals

In [28]:
# Insert into database
rows = []
columns = df.columns
for index, row in df.iterrows():
    object = Row(
    state_id= row['state_id'],
    name= row['NAME'],
    address= row['ADDRESS'],
    city= row['CITY'],
    zip= row['ZIP'],
    owner= row['OWNER'],
    rating= row['RATING'],
    website= row['WEBSITE']
    )
    rows.append(object)

In [26]:
# Commit changes
sesh = Session(engine)
sesh.add_all(rows)
sesh.commit()
sesh.close()