# hospital info extract

## imports

In [100]:
import pandas as pd
import numpy as np
import json
import glob

from collections import defaultdict

## load data

### create ZIP to FIPS map

In [54]:
statefips = pd.read_csv("./state_fips.csv")

In [74]:
state2fip = {}

for row in statefips.to_dict(orient="records"):
    state2fip[row["state_abbr"]] = f"{row['fips']:02d}"

state2fip["DC"] = "11" # District of Columbia
state2fip["PR"] = "72" # Puerto Rico
state2fip["VI"] = "78" # Virgin Islands
state2fip["AS"] = "60" # American Samoa
state2fip["GU"] = "66" # Guam
state2fip["MP"] = "69" # Northern Mariana Islands
state2fip["FM"] = "64" # Federated States of Micronesia
state2fip["PW"] = "70" # Palau
state2fip["MH"] = "68" # Marshall Islands

In [75]:
zip2fips = {}

# 00401000000000200020002NY119WESTCHESTER

for fname in glob.glob("./zipcty*"):
    with open(fname) as f:
        f.readline()    # skip the first line
        for line in f:
            try:
                line = line.strip()
                zipcode = line[0:5]
                state = line[23:25]
                if state == "  ":
                    state = "GA"
                county = line[25:28]
                fips = state2fip[state] + county
                zip2fips[zipcode] = fips
            except:
                print(line)

In [79]:
morezips = pd.read_csv("./ZIP-COUNTY-FIPS_2018-03.csv")

In [81]:
for z in morezips.to_dict(orient="records"):
    zc = f"{z['ZIP']:05d}"
    fp = f"{z['STCOUNTYFP']:05d}"
    
    zip2fips[zc] = fp

In [83]:
zip2fips["34265"] = "12027"
zip2fips["42135"] = "21073"
zip2fips["98415"] = "53053"
zip2fips["35161"] = "01121"
zip2fips["38935"] = "28083"

In [4]:
df = pd.read_csv("./hospital_general_info.csv")

In [6]:
df.columns

Index(['Facility ID', 'Facility Name', 'Address', 'City', 'State', 'ZIP Code',
       'County Name', 'Phone Number', 'Hospital Type', 'Hospital Ownership',
       'Emergency Services',
       'Meets criteria for promoting interoperability of EHRs',
       'Hospital overall rating', 'Hospital overall rating footnote',
       'Mortality national comparison',
       'Mortality national comparison footnote',
       'Safety of care national comparison',
       'Safety of care national comparison footnote',
       'Readmission national comparison',
       'Readmission national comparison footnote',
       'Patient experience national comparison',
       'Patient experience national comparison footnote',
       'Effectiveness of care national comparison',
       'Effectiveness of care national comparison footnote',
       'Timeliness of care national comparison',
       'Timeliness of care national comparison footnote',
       'Efficient use of medical imaging national comparison',
       'Ef

In [94]:
df2 = df.rename(columns={
    "Facility ID": "id",
    "Facility Name": "name",
    "Address": "addr",
    "City": "city",
    "State": "state",
    "ZIP Code": "zip",
    "Hospital Type": "type",
    "Hospital Ownership": "ownership",
    "Hospital overall rating": "rating"
})


df2 = df2[df2["rating"] != "Not Available"]
df2["rating"] = df2["rating"].astype(np.float32)

In [96]:
df2["fips"] = df2["zip"].apply(lambda x: zip2fips[f"{x:05d}"])

In [98]:
df2.groupby("fips")["rating"].mean().describe()

count    1707.000000
mean        3.299917
std         0.887425
min         1.000000
25%         3.000000
50%         3.250000
75%         4.000000
max         5.000000
Name: rating, dtype: float64

In [101]:
fips_hospital_data = defaultdict(list)

for row in df2.to_dict(orient="records"):
    fips = row["fips"]
    rating = row["rating"]
    name = row["name"].lower()
    
    fips_hospital_data[fips].append((name, rating))

In [104]:
json.dump(fips_hospital_data, open("./fips-hospital-data.json", 'w'))

In [105]:
json.dump(state2fip, open("state_fip_codes.json", 'w'))

In [106]:
json.dump(zip2fips, open("zipcode_fip_codes.json", 'w'))