### Hospital Geocoder

In [1]:
import pandas as pd
import numpy as np
import requests
from geopy.geocoders import Nominatim 
import json
import re
import time

In [2]:
filename = 'reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20210103'
df = pd.read_csv(filename + '.csv')

In [3]:
df['zip'] = df['zip'].apply(lambda x: int(x) if x == x else "")

In [4]:
df['zip'].isna().any()

False

In [5]:
df['state'].isna().any()

False

In [6]:
df.shape

(106959, 93)

In [7]:
df_hospitals = df.drop_duplicates(['hospital_name', 'address', 'city', 'zip'])
df_hospitals.shape

(4924, 93)

In [8]:
df_hospitals = df.drop_duplicates(['hospital_name', 'state', 'zip'])
df_hospitals.shape

(4924, 93)

`Address`, `city`, `zip` are key identifiers. Use these to find address and city

In [9]:
with open('./keys.json', 'r') as f:
    API_KEY = json.load(f)['google']

In [10]:
def gmaps_geocoder(query_props, API_KEY):
    query = ','.join(query_props)
    api_url = f'https://maps.googleapis.com/maps/api/geocode/json?address={query}&region=us&key={API_KEY}'
    adjusted_query = False
    #Use exponential backoff API querying per Google API best practices
    current_delay = 0.1  # Set the initial retry delay to 100ms.
    max_delay = 5  
    while current_delay < max_delay:
        r = requests.get(api_url)
        res = json.loads(r.content)
        if res['status'] == 'OK':
            location = res['results'][0]['geometry']['location']
            address_components = res['results'][0]['address_components']
            number = next((component['short_name'] for component in address_components if any(item in component['types'] for item in ["street_number", "establishment", "point_of_interest"])), '')
            street = next((component['short_name'] for component in address_components if any(item in component['types'] for item in ["route", "intersection"])), '')
            address = ' '.join([number, street]) if (number and street) else street
            if not address and not adjusted_query:
                adjusted_query = True
                print(f'Using only hospital name: {query_props[0]}')
                api_url = f'https://maps.googleapis.com/maps/api/geocode/json?address={query_props[0]}&region=us&key={API_KEY}'
                continue
            city = next((component['long_name'] for component in address_components if any(item in component['types'] for item in ["locality"])), None)
            return {
                'address': address,
                'city': city,
                'lat': location['lat'],
                'lng': location['lng']
            }
        elif res['status'] == 'ZERO_RESULTS':
            break      
        time.sleep(current_delay)
        current_delay *= 2
    print(f'Google maps could not find based on {query}')
    print(f'See {api_url}')
    return None  
            

print(gmaps_geocoder(['Hospital San Antonio', 'PR'], API_KEY))
print(gmaps_geocoder(['CDT Susana Centeno','PR997','Vieques','PR','765'], API_KEY))
print(gmaps_geocoder(['Centro Medico Correccional de Bayamon', 'PR', '960'], API_KEY))   

{'address': '#18 Norte Calle Ramón Emeterio Betances', 'city': 'Mayagüez', 'lat': 18.2031146, 'lng': -67.1411964}
{'address': 'PR-997', 'city': 'Vieques', 'lat': 18.1420344, 'lng': -65.43991799999999}
Using only hospital name: Centro Medico Correccional de Bayamon
{'address': 'Expreso Río Hondo', 'city': 'Bayamón', 'lat': 18.4176894, 'lng': -66.1480701}


In [11]:
# Because OSM loves aliases
def osm_address_parse(address_dict):
    number_aliases = ['house_number','street_number']
    street_aliases = ['road','footway','street','street_name','residential','path','pedestrian','road_reference','road_reference_intl','square','place']
    city_aliases = ['city','town','village', 'hamlet', 'locality', 'croft']
    number = next((item for item in map(address_dict.get, number_aliases) if item), None)
    street = next((item for item in map(address_dict.get, street_aliases) if item), None)
    city = next((item for item in map(address_dict.get, city_aliases) if item), None)
    return (f'{number} {street}', city) if (number and street and city) else None

In [12]:
locator = Nominatim(user_agent="explorer")
#url_sanitize = re.compile('[^ ,A-Za-zÀ-ÖØ-öø-ÿ0-9]')
url_sanitize = re.compile(r"[!*'();:@&=+$/?%#\[\]]")
query = 'Hospital San Antonio,#18 Norte, Calle Dr,Mayagüez,PR,680'
url_sanitize.sub('', query)

'Hospital San Antonio,18 Norte, Calle Dr,Mayagüez,PR,680'

In [13]:
#This really should be a class
def hospital_search(row):
    query_props = [row['hospital_name'], row['address'], row['city'], row['state'], row['zip']]
    query_props = [url_sanitize.sub('', str(item)) for item in query_props if item == item]
    if not row[['address', 'city', 'state', 'zip']].isnull().values.any():
        #I don't trust OSM with hospital names
        q = ', '.join(query_props[1:])
        res = locator.geocode(q, country_codes='us', addressdetails=True)
        if res:
            address_components = osm_address_parse(res.raw['address'])
            if address_components:
                lat = res[1][0]
                lng = res[1][1]
                return pd.Series({'address': address_components[0], 'city': address_components[1], 'lat': lat, 'lng': lng})
    #if using hospital name, better trust google
    return pd.Series(gmaps_geocoder(query_props, API_KEY))

hospital_search(pd.Series({'hospital_name': 'LANDMARK HOSPITAL OF SOUTHWEST FLORIDA', 
                           'address': '1285 CREEKSIDE BLVD E', 
                           'city': 'NAPLES', 
                           'state': 'FL', 
                           'zip': '12021'}))

address    1285 Creekside Blvd
city                    Naples
lat                      26.27
lng                   -81.7891
dtype: object

## Perform tests

In [14]:
df_test = df.head(30)
df_test_2 = df_test.replace('2020-12-04', '2020-12-11')
df_test = pd.concat([df_test, df_test_2]).reset_index(drop=True)
df_test

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum
0,df2363988746840134806aa7b2accf9c89259601776986...,2020-12-25,PR,,Hospital San Antonio,"#18 Norte, Calle Dr",Mayagüez,680,Short Term,,...,-999999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,166.0,0.0
1,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412...,2020-12-25,LA,,Surgery Center of Zachary,,,70791,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,452193,2020-12-25,TX,452193.0,El Paso LTAC Hospital,,,79902,Long Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,aab2bb3ab769da90baf57242c96ec481afb5ec6a233784...,2020-12-25,LA,,Crescent City Surgical Centre,,,70118,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9922a3f069ff4686fe467885206fb1172f609233cfe517...,2020-12-25,PR,,Centro Medico Correccional de Bayamon,,,960,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,976b4ce7c95e1cd394112ce7762ed040f18a141e56a52e...,2020-12-25,PR,,Hospital Industrial C.F.S.E,Paseo Dr. Jose Celso Barbosa,San Juan,935,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
6,ff9032c172057190bf52f523d253c79af2d5f74d007084...,2020-12-25,PR,,HealthproMed,,,775,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0
7,36db6bad3679dfdcccc301fc608438109ad907943c702f...,2020-12-25,PR,,CDT Susana Centeno,,,765,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0
8,f70d7abf93c78280583ac18e896e6737cca8212b017513...,2020-12-25,NV,,Elite Medical Center,150 E Harmon Ave,Las Vegas,89109,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0
9,ee04edd185865c38c839812cb2eb5ae5d3f8922e3b629e...,2020-12-25,LA,,Alexandria Emergency Hospital,5900 Coliseum Blvd,Alexandria,71303,Short Term,,...,,,,,,,,,,


In [15]:
# Address, city, zip key identifiers
df_test_hospitals = df_test.drop_duplicates(['address', 'city', 'zip'])[['hospital_name', 'address', 'city', 'state', 'zip']]
df_test_hospitals

Unnamed: 0,hospital_name,address,city,state,zip
0,Hospital San Antonio,"#18 Norte, Calle Dr",Mayagüez,PR,680
1,Surgery Center of Zachary,,,LA,70791
2,El Paso LTAC Hospital,,,TX,79902
3,Crescent City Surgical Centre,,,LA,70118
4,Centro Medico Correccional de Bayamon,,,PR,960
5,Hospital Industrial C.F.S.E,Paseo Dr. Jose Celso Barbosa,San Juan,PR,935
6,HealthproMed,,,PR,775
7,CDT Susana Centeno,,,PR,765
8,Elite Medical Center,150 E Harmon Ave,Las Vegas,NV,89109
9,Alexandria Emergency Hospital,5900 Coliseum Blvd,Alexandria,LA,71303


In [16]:
df_test_hospitals[['address', 'city', 'lat', 'lng']] = df_test_hospitals.apply(hospital_search, axis=1)
df_test_hospitals

Using only hospital name: Centro Medico Correccional de Bayamon


Unnamed: 0,hospital_name,address,city,state,zip,lat,lng
0,Hospital San Antonio,#18 Norte Calle Ramón Emeterio Betances,Mayagüez,PR,680,18.203115,-67.141196
1,Surgery Center of Zachary,4845 Main St,Zachary,LA,70791,30.648624,-91.152534
2,El Paso LTAC Hospital,1221 N Cotton St,El Paso,TX,79902,31.783761,-106.474257
3,Crescent City Surgical Centre,3017 Galleria,Metairie,LA,70118,29.99326,-90.15302
4,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,PR,960,18.417689,-66.14807
5,Hospital Industrial C.F.S.E,Paseo Dr. Jose Celso Barbosa,San Juan,PR,935,18.394457,-66.073083
6,HealthproMed,Avenida Borinquen,San Juan,PR,775,18.437672,-66.056066
7,CDT Susana Centeno,PR-997,Vieques,PR,765,18.142034,-65.439918
8,Elite Medical Center,150 E Harmon Ave,Las Vegas,NV,89109,36.10864,-115.165903
9,Alexandria Emergency Hospital,5900 Coliseum Boulevard,Alexandria,LA,71303,31.29505,-92.494976


Replace NaN values?

In [17]:
col_order = df_test.columns
df_updated = df_test.combine_first(df_test_hospitals)
df_updated = df_updated.reindex(columns=col_order.tolist())
df_updated

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum
0,df2363988746840134806aa7b2accf9c89259601776986...,2020-12-25,PR,,Hospital San Antonio,"#18 Norte, Calle Dr",Mayagüez,680,Short Term,,...,-999999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,166.0,0.0
1,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412...,2020-12-25,LA,,Surgery Center of Zachary,4845 Main St,Zachary,70791,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,452193,2020-12-25,TX,452193.0,El Paso LTAC Hospital,1221 N Cotton St,El Paso,79902,Long Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,aab2bb3ab769da90baf57242c96ec481afb5ec6a233784...,2020-12-25,LA,,Crescent City Surgical Centre,3017 Galleria,Metairie,70118,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9922a3f069ff4686fe467885206fb1172f609233cfe517...,2020-12-25,PR,,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,960,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,976b4ce7c95e1cd394112ce7762ed040f18a141e56a52e...,2020-12-25,PR,,Hospital Industrial C.F.S.E,Paseo Dr. Jose Celso Barbosa,San Juan,935,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
6,ff9032c172057190bf52f523d253c79af2d5f74d007084...,2020-12-25,PR,,HealthproMed,Avenida Borinquen,San Juan,775,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0
7,36db6bad3679dfdcccc301fc608438109ad907943c702f...,2020-12-25,PR,,CDT Susana Centeno,PR-997,Vieques,765,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0
8,f70d7abf93c78280583ac18e896e6737cca8212b017513...,2020-12-25,NV,,Elite Medical Center,150 E Harmon Ave,Las Vegas,89109,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0
9,ee04edd185865c38c839812cb2eb5ae5d3f8922e3b629e...,2020-12-25,LA,,Alexandria Emergency Hospital,5900 Coliseum Blvd,Alexandria,71303,Short Term,,...,,,,,,,,,,


In [18]:
df_updated = df_test.fillna(pd.concat([df_test_hospitals, df_test_hospitals]).reset_index(drop=True))
df_updated

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum
0,df2363988746840134806aa7b2accf9c89259601776986...,2020-12-25,PR,,Hospital San Antonio,"#18 Norte, Calle Dr",Mayagüez,680,Short Term,,...,-999999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,166.0,0.0
1,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412...,2020-12-25,LA,,Surgery Center of Zachary,4845 Main St,Zachary,70791,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,452193,2020-12-25,TX,452193.0,El Paso LTAC Hospital,1221 N Cotton St,El Paso,79902,Long Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,aab2bb3ab769da90baf57242c96ec481afb5ec6a233784...,2020-12-25,LA,,Crescent City Surgical Centre,3017 Galleria,Metairie,70118,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9922a3f069ff4686fe467885206fb1172f609233cfe517...,2020-12-25,PR,,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,960,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,976b4ce7c95e1cd394112ce7762ed040f18a141e56a52e...,2020-12-25,PR,,Hospital Industrial C.F.S.E,Paseo Dr. Jose Celso Barbosa,San Juan,935,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
6,ff9032c172057190bf52f523d253c79af2d5f74d007084...,2020-12-25,PR,,HealthproMed,Avenida Borinquen,San Juan,775,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0
7,36db6bad3679dfdcccc301fc608438109ad907943c702f...,2020-12-25,PR,,CDT Susana Centeno,PR-997,Vieques,765,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0
8,f70d7abf93c78280583ac18e896e6737cca8212b017513...,2020-12-25,NV,,Elite Medical Center,150 E Harmon Ave,Las Vegas,89109,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0
9,ee04edd185865c38c839812cb2eb5ae5d3f8922e3b629e...,2020-12-25,LA,,Alexandria Emergency Hospital,5900 Coliseum Blvd,Alexandria,71303,Short Term,,...,,,,,,,,,,


It works I guess, but I don't like it

Writing custom mapper object

In [19]:
def hospital_mapper(row):
    index_map = (df_test_hospitals[['hospital_name', 'state', 'zip']] == row[['hospital_name', 'state', 'zip']]).all(axis=1)
    i = df_test_hospitals.index[index_map]
    series = df_test_hospitals.iloc[i]
    return series.iloc[0][['address', 'city', 'lat', 'lng']]
    
df_updated = df_test
df_updated[['address', 'city', 'lat', 'lng']] = df_updated.apply(hospital_mapper, axis=1)
df_updated

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,lat,lng
0,df2363988746840134806aa7b2accf9c89259601776986...,2020-12-25,PR,,Hospital San Antonio,#18 Norte Calle Ramón Emeterio Betances,Mayagüez,680,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,166.0,0.0,18.203115,-67.141196
1,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412...,2020-12-25,LA,,Surgery Center of Zachary,4845 Main St,Zachary,70791,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.648624,-91.152534
2,452193,2020-12-25,TX,452193.0,El Paso LTAC Hospital,1221 N Cotton St,El Paso,79902,Long Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.783761,-106.474257
3,aab2bb3ab769da90baf57242c96ec481afb5ec6a233784...,2020-12-25,LA,,Crescent City Surgical Centre,3017 Galleria,Metairie,70118,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.99326,-90.15302
4,9922a3f069ff4686fe467885206fb1172f609233cfe517...,2020-12-25,PR,,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,960,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.417689,-66.14807
5,976b4ce7c95e1cd394112ce7762ed040f18a141e56a52e...,2020-12-25,PR,,Hospital Industrial C.F.S.E,Paseo Dr. Jose Celso Barbosa,San Juan,935,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,18.394457,-66.073083
6,ff9032c172057190bf52f523d253c79af2d5f74d007084...,2020-12-25,PR,,HealthproMed,Avenida Borinquen,San Juan,775,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,18.437672,-66.056066
7,36db6bad3679dfdcccc301fc608438109ad907943c702f...,2020-12-25,PR,,CDT Susana Centeno,PR-997,Vieques,765,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0,18.142034,-65.439918
8,f70d7abf93c78280583ac18e896e6737cca8212b017513...,2020-12-25,NV,,Elite Medical Center,150 E Harmon Ave,Las Vegas,89109,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,36.10864,-115.165903
9,ee04edd185865c38c839812cb2eb5ae5d3f8922e3b629e...,2020-12-25,LA,,Alexandria Emergency Hospital,5900 Coliseum Boulevard,Alexandria,71303,Short Term,,...,,,,,,,,,31.29505,-92.494976


Better. Needs to be wrapped

## Implement same procedure

In [20]:
df_hospitals = df_hospitals[['hospital_name', 'address', 'city', 'state', 'zip']]
df_hospitals.shape

(4924, 5)

In [21]:
THE FOLLOWING PROCEDURE IS EXPENSIVE

SyntaxError: invalid syntax (<ipython-input-21-f0b4aa86e3c9>, line 1)

In [22]:
df_hospitals[['address', 'city', 'lat', 'lng']] = df_hospitals.apply(hospital_search, axis=1)
print('Done')

Using only hospital name: Centro Medico Correccional de Bayamon
Using only hospital name: ST VINCENTS EAST
Using only hospital name: NORTH ALABAMA SPECIALTY HOSPITAL
Using only hospital name: ST ALEXIUS MEDICAL CENTER
Using only hospital name: PARKVIEW REGIONAL MEDICAL CENTER
Using only hospital name: CHARLES A DEAN MEMORIAL HOSPITAL
Using only hospital name: ESSENTIA HEALTH HOLY TRINITY HOSPITAL
Using only hospital name: MAYO CLINIC HEALTH SYSTEM - ST JAMES
Using only hospital name: KPC PROMISE HOSPITAL OF VICKSBURG
Using only hospital name: SAINT LUKES EAST HOSPITAL
Using only hospital name: CARSON VALLEY MEDICAL CENTER
Using only hospital name: SOCORRO GENERAL HOSPITAL
Using only hospital name: ROSWELL PARK CANCER INSTITUTE
Using only hospital name: MORROW COUNTY HOSPITAL
Using only hospital name: SELECT SPECIALTY HOSPITAL - PITTSBURGHUPMC
Using only hospital name: MAIN LINE HOSPITAL LANKENAU
Using only hospital name: PLATTE HEALTH CENTER - CAH
Using only hospital name: FREEMAN MEDI

In [23]:
df_hospitals.shape

(4924, 7)

In [24]:
df_hospitals

Unnamed: 0,hospital_name,address,city,state,zip,lat,lng
0,Hospital San Antonio,#18 Norte Calle Ramón Emeterio Betances,Mayagüez,PR,680,18.203115,-67.141196
1,Surgery Center of Zachary,4845 Main St,Zachary,LA,70791,30.648624,-91.152534
2,El Paso LTAC Hospital,1221 N Cotton St,El Paso,TX,79902,31.783761,-106.474257
3,Crescent City Surgical Centre,3017 Galleria,Metairie,LA,70118,29.993260,-90.153020
4,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,PR,960,18.417689,-66.148070
...,...,...,...,...,...,...,...
83275,GLENDORA OAKS BEHAVIORAL HEALTH HOSPITAL,150 Route 66,Glendora,CA,91740,34.128893,-117.863118
83980,SOUTHWEST GEORGIA REGIONAL MEDICAL CENTER,361 Randolph Street,Cuthbert,GA,39840,31.775452,-84.793617
87751,Encompass Health Rehabilitation Center,6401 Directors Pkwy,Abilene,TX,79606,32.371281,-99.743929
97324,Knox Co. Hospital District,701 S E 5th St,Knox City,TX,79529,33.413768,-99.811273


In [25]:
df_hospitals.to_csv('hospitals_geocoded.csv')

### Map geocode data to original dataframe

In [26]:
df_hospitals = df_hospitals.reset_index(drop=True)
df_hospitals

Unnamed: 0,hospital_name,address,city,state,zip,lat,lng
0,Hospital San Antonio,#18 Norte Calle Ramón Emeterio Betances,Mayagüez,PR,680,18.203115,-67.141196
1,Surgery Center of Zachary,4845 Main St,Zachary,LA,70791,30.648624,-91.152534
2,El Paso LTAC Hospital,1221 N Cotton St,El Paso,TX,79902,31.783761,-106.474257
3,Crescent City Surgical Centre,3017 Galleria,Metairie,LA,70118,29.993260,-90.153020
4,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,PR,960,18.417689,-66.148070
...,...,...,...,...,...,...,...
4919,GLENDORA OAKS BEHAVIORAL HEALTH HOSPITAL,150 Route 66,Glendora,CA,91740,34.128893,-117.863118
4920,SOUTHWEST GEORGIA REGIONAL MEDICAL CENTER,361 Randolph Street,Cuthbert,GA,39840,31.775452,-84.793617
4921,Encompass Health Rehabilitation Center,6401 Directors Pkwy,Abilene,TX,79606,32.371281,-99.743929
4922,Knox Co. Hospital District,701 S E 5th St,Knox City,TX,79529,33.413768,-99.811273


In [27]:
def mapper_gen(df):
    def mapper(row):
        index_map = (df[['hospital_name', 'state', 'zip']] == row[['hospital_name', 'state', 'zip']]).all(axis=1)
        i = df.index[index_map]
        #Got thrown an index error. Why does this happen, if it found a match
        try:
            series = df.iloc[i]
        except IndexError:
            print(f"Could not match {row['hospital_name']}, {row['address']}, {row['city']}, {row['state']}, {row['zip']}")
            print(i)
            return None
        if not series.empty:
            return series.iloc[0][['address', 'city', 'lat', 'lng']]
        else:
            print(f"Could not match {row['hospital_name']}, {row['address']}, {row['city']}, {row['state']}, {row['zip']}")
            print(series)
            return None
    return mapper

In [28]:
df_geocoded = df
hospital_mapper = mapper_gen(df_hospitals)
df_geocoded[['address', 'city', 'lat', 'lng']] = df_geocoded.apply(hospital_mapper, axis=1)
df_geocoded

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,lat,lng
0,df2363988746840134806aa7b2accf9c89259601776986...,2020-12-25,PR,,Hospital San Antonio,#18 Norte Calle Ramón Emeterio Betances,Mayagüez,680,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,166.0,0.0,18.203115,-67.141196
1,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412...,2020-12-25,LA,,Surgery Center of Zachary,4845 Main St,Zachary,70791,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.648624,-91.152534
2,452193,2020-12-25,TX,452193,El Paso LTAC Hospital,1221 N Cotton St,El Paso,79902,Long Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.783761,-106.474257
3,aab2bb3ab769da90baf57242c96ec481afb5ec6a233784...,2020-12-25,LA,,Crescent City Surgical Centre,3017 Galleria,Metairie,70118,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.993260,-90.153020
4,9922a3f069ff4686fe467885206fb1172f609233cfe517...,2020-12-25,PR,,Centro Medico Correccional de Bayamon,Expreso Río Hondo,Bayamón,960,Short Term,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.417689,-66.148070
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106954,400130,2020-07-31,PR,400130,HOSPITAL METROPOLITANO DE LA MONTANA,,Utuado,641,Short Term,72141.0,...,0.0,0.0,0.0,0.0,0.0,0.0,35.0,,18.267567,-66.701342
106955,400115,2020-07-31,PR,400115,CENTRO MEDICO WILMA N VAZQUEZ,Carretera #2 Km,Vega Baja,693,Short Term,72145.0,...,9.0,12.0,9.0,10.0,0.0,4.0,170.0,,18.445622,-66.398985
106956,400110,2020-07-31,PR,400110,HOSPITAL METROPOLITANO DR TITO MATTEI,,Yauco,698,Short Term,72153.0,...,0.0,4.0,0.0,0.0,0.0,0.0,274.0,,18.031410,-66.857723
106957,480002,2020-07-31,VI,480002,GOV JUAN F LUIS HOSPITAL & MEDICAL CTR,4007 Estate Diamond Ruby,Christiansted,820,Short Term,78010.0,...,0.0,0.0,0.0,-999999.0,0.0,0.0,193.0,,17.733683,-64.751426


In [29]:
df_geocoded.to_csv(filename + '_geocoded.csv')