In [83]:
# Import Dependencies
import pandas as pd
import numpy as np
import os

In [84]:
# Import CSV and convert to dataframes
hospital_df = pd.read_csv(os.path.join("Resources", "Hospital_General_Information.csv"))

In [85]:
# Drop unnecessary columns
hospital_df = hospital_df[["Hospital Name","Address","City","State","ZIP Code","County Name","Phone Number","Hospital Type","Emergency Services",
"Hospital overall rating","Safety of care national comparison","Patient experience national comparison",
"Effectiveness of care national comparison"]]

In [86]:
# Count Number of null data
hospital_df.isnull().sum() 

Hospital Name                                 0
Address                                       0
City                                          0
State                                         0
ZIP Code                                      0
County Name                                  11
Phone Number                                  0
Hospital Type                                 0
Emergency Services                            0
Hospital overall rating                       0
Safety of care national comparison            0
Patient experience national comparison        0
Effectiveness of care national comparison     0
dtype: int64

In [87]:
# Verify data are of correct type
# hospital_df.dtypes
# hospital_df.infer_objects().dtypes
hospital_df.dtypes


Hospital Name                                object
Address                                      object
City                                         object
State                                        object
ZIP Code                                      int64
County Name                                  object
Phone Number                                  int64
Hospital Type                                object
Emergency Services                             bool
Hospital overall rating                      object
Safety of care national comparison           object
Patient experience national comparison       object
Effectiveness of care national comparison    object
dtype: object

In [88]:
# # Create unique key column from address and zip code
# str_address = hospital_df["Address"].map(str)
# words= str_address.split(" ");
# print(words)
# # hospital_df["Loc_code"] = hospital_df["Address"].map(str) + "-" + contact_df["ZIP Code"].map(str)

In [89]:
total_rows =hospital_df["Hospital Name"].count


In [90]:
# Drop any duplicate rows
hospital_df.drop_duplicates()

Unnamed: 0,Hospital Name,Address,City,State,ZIP Code,County Name,Phone Number,Hospital Type,Emergency Services,Hospital overall rating,Safety of care national comparison,Patient experience national comparison,Effectiveness of care national comparison
0,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,2565938310,Acute Care Hospitals,True,1,Below the national average,Same as the national average,Above the national average
1,DEKALB REGIONAL MEDICAL CENTER,200 MED CENTER DRIVE,FORT PAYNE,AL,35968,DE KALB,2568453150,Acute Care Hospitals,True,2,Above the national average,Below the national average,Same as the national average
2,CRESTWOOD MEDICAL CENTER,ONE HOSPITAL DR SE,HUNTSVILLE,AL,35801,MADISON,2568823100,Acute Care Hospitals,True,2,Below the national average,Below the national average,Same as the national average
3,CHOCTAW GENERAL HOSPITAL,"401 VANITY FAIR LANE, PO BOX 618",BUTLER,AL,36904,CHOCTAW,2054599100,Critical Access Hospitals,True,Not Available,Not Available,Not Available,Same as the national average
4,PROVIDENCE VALDEZ MEDICAL CENTER,PO BOX 550,VALDEZ,AK,99686,,9078352249,Critical Access Hospitals,True,Not Available,Not Available,Not Available,Not Available
5,YUKON KUSKOKWIM DELTA REG HOSPITAL,PO BOX 287,BETHEL,AK,99559,BETHEL,9075436300,Acute Care Hospitals,True,3,Not Available,Below the national average,Same as the national average
6,PETERSBURG MEDICAL CENTER,PO BOX 589,PETERSBURG,AK,99833,,9077724291,Critical Access Hospitals,True,Not Available,Not Available,Not Available,Not Available
7,CORDOVA COMMUNITY MEDICAL CENTER,PO BOX 160 - 602 CHASE AVENUE,CORDOVA,AK,99574,,9074248000,Critical Access Hospitals,True,Not Available,Not Available,Not Available,Not Available
8,NORTON SOUND REGIONAL HOSPITAL,1000 GREG KRUSCHEK AVENUE (P O BOX 966),NOME,AK,99762,NOME,9074433311,Critical Access Hospitals,True,Not Available,Not Available,Not Available,Not Available
9,KANAKANAK HOSPITAL,P O BOX 130,DILLINGHAM,AK,99576,DILLINGHAM,9078425201,Critical Access Hospitals,True,Not Available,Not Available,Not Available,Not Available


In [91]:
hospital_df.to_csv(os.path.join('Resources', 'hospital_cleaned.csv'))

In [92]:
# hospital_nj_df=hospital_df.loc[hospital_df['State'].isin("NJ")]
hospital_nj_df= hospital_df[hospital_df['State'] == "NJ"]

In [93]:
total_rows =hospital_nj_df["Hospital Name"].count
total_rows

<bound method Series.count of 160                            CLARA MAASS MEDICAL CENTER
161                PENN MEDICINE PRINCETON MEDICAL CENTER
162                      CAPE REGIONAL MEDICAL CENTER INC
163                              RIVERVIEW MEDICAL CENTER
164               ROBERT WOOD JOHNSON UNIVERSITY HOSPITAL
165              CAPITAL HEALTH MEDICAL CENTER - HOPEWELL
166                           HACKENSACK-UMC MOUNTAINSIDE
167                JERSEY SHORE UNIVERSITY MEDICAL CENTER
168      ROBERT WOOD JOHNSON UNIVERSITY HOSPITAL HAMILTON
2273                             COMMUNITY MEDICAL CENTER
2277                            ST LUKE'S WARREN HOSPITAL
2325                           COOPER UNIVERSITY HOSPITAL
2344                             HUDSON REGIONAL HOSPITAL
2356              MONMOUTH MEDICAL CENTER-SOUTHERN CAMPUS
2363                               CHILTON MEDICAL CENTER
2370                           JERSEY CITY MEDICAL CENTER
2419    JFK MEDICAL CTR - ANTHONY M. YELEN

In [94]:
# hospital_nj_df.to_csv(os.path.join('Resources', 'hospital_cleaned_nj.csv'))

In [95]:
# Drop and rename columns
hospital_nj_df= hospital_nj_df[["Hospital Name","City","ZIP Code","County Name","Hospital overall rating",
                                "Effectiveness of care national comparison"]]
hospital_nj_df= hospital_nj_df.rename(columns={"Hospital Name":"NAME","City":"CITY","ZIP Code":"ZIP",
                                               "County Name":"COUNTY","Hospital overall rating":"RATE",
                                               "Effectiveness of care national comparison":"CARE_EFF"})

In [96]:
hospital_nj_df.loc[hospital_nj_df.CARE_EFF=="Same as the national average", 'CARE_EFF'] = "0"

In [97]:
hospital_nj_df.loc[hospital_nj_df.CARE_EFF=="Above the national average", 'CARE_EFF'] = "1"

In [98]:
hospital_nj_df.loc[hospital_nj_df.CARE_EFF=="Below the national average", 'CARE_EFF'] = "-1"

In [99]:
hospital_nj_df=hospital_nj_df[hospital_nj_df['CARE_EFF'] != "Not Available"]

In [101]:
hospital_nj_df.to_csv(os.path.join('Resources', 'hospital_cleaned_nj.csv'))