In [1]:
import re
import os
import pandas as pd
import requests
api_key = "MY API KEY"
from tqdm import tqdm



In [5]:
df = pd.read_csv("nyc_child_adolescent_clinics_addresses.csv",dtype={'site_zip_code':'str'})
df.site_zip_code = df.site_zip_code.str[:5]
df.head()

Unnamed: 0,program_region,program_county,sponsor_name,agency_name,site_name,main_site,site_address,site_city,site_state,site_zip_code,site_county,population_simple,type
0,New York City,Bronx,"Acacia Network, Inc.","PROMESA, Inc.",Westchester Center of Excellence,Y,915 Westchester Avenue 2nd Floor,Bronx,NY,10459,Bronx,Children & Adolescents,Community
1,New York City,Bronx,Association to Benefit Children,Association to Benefit Children,Fast Break - The Bronx,Y,1366 Inwood Avenue,Bronx,NY,10452,Bronx,Children & Adolescents,Community
2,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,Astor @ Mott Hall V,N,1551 East 172nd Street Room 216C,Bronx,NY,10472,Bronx,Children & Adolescents,Community
3,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,Astor Clinic Treatment Program - Tilden,Y,750 Tilden Street,Bronx,NY,10467,Bronx,Children & Adolescents,Community
4,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,"Astor Services @ MS 80, The Isabel Rooney School",N,149 East Mosholu Parkway Room 414C,Bronx,NY,10467,Bronx,Adolescents,School


In [6]:
df['full_address'] = df.site_address + ", " + df.site_city + ", " + df.site_state + " " + df.site_zip_code
df.head()

Unnamed: 0,program_region,program_county,sponsor_name,agency_name,site_name,main_site,site_address,site_city,site_state,site_zip_code,site_county,population_simple,type,full_address
0,New York City,Bronx,"Acacia Network, Inc.","PROMESA, Inc.",Westchester Center of Excellence,Y,915 Westchester Avenue 2nd Floor,Bronx,NY,10459,Bronx,Children & Adolescents,Community,"915 Westchester Avenue 2nd Floor, Bronx, NY 10459"
1,New York City,Bronx,Association to Benefit Children,Association to Benefit Children,Fast Break - The Bronx,Y,1366 Inwood Avenue,Bronx,NY,10452,Bronx,Children & Adolescents,Community,"1366 Inwood Avenue, Bronx, NY 10452"
2,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,Astor @ Mott Hall V,N,1551 East 172nd Street Room 216C,Bronx,NY,10472,Bronx,Children & Adolescents,Community,"1551 East 172nd Street Room 216C, Bronx, NY 10472"
3,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,Astor Clinic Treatment Program - Tilden,Y,750 Tilden Street,Bronx,NY,10467,Bronx,Children & Adolescents,Community,"750 Tilden Street, Bronx, NY 10467"
4,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,"Astor Services @ MS 80, The Isabel Rooney School",N,149 East Mosholu Parkway Room 414C,Bronx,NY,10467,Bronx,Adolescents,School,"149 East Mosholu Parkway Room 414C, Bronx, NY ..."


In [8]:
print(df.shape)

(487, 14)

In [9]:
addresses = df.drop_duplicates(subset="full_address").full_address.to_list()
print(len(addresses))
addresses[0:5]

483


['915 Westchester Avenue 2nd Floor, Bronx, NY 10459',
 '1366 Inwood Avenue, Bronx, NY 10452',
 '1551 East 172nd Street Room 216C, Bronx, NY 10472',
 '750 Tilden Street, Bronx, NY 10467',
 '149 East Mosholu Parkway Room 414C, Bronx, NY 10467']

In [11]:
# Here is a loop that goes to different addresses
# And gathers up the information we want from the Google API requests
# time.sleep is putting in a one second pause between requests
# So that we don't overload Google's server (You want to do this if you're getting a lot of stuff)
# Because APIs often have limits on how many times you can send requests or how often

# this "list" (arrays are called lists in Python) gathers the results
results_array = []
bad_addresses = []
import time

for place in tqdm(addresses):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json'
        keys = {'address': place, 'key': api_key}
        r = requests.get(url,params=keys)
        result_dic = r.json()        
        
        each_result = {}
        each_result['address'] = place
        each_result['lat'] = result_dic['results'][0]['geometry']['location']['lat']
        each_result['long'] = result_dic['results'][0]['geometry']['location']['lng']
        
    # make geometry geojson friendly
        point_geo = []
        point_geo.append(result_dic['results'][0]['geometry']['location']['lng'])
        point_geo.append(result_dic['results'][0]['geometry']['location']['lat'])
        each_result['geometry'] = point_geo
        
    # save the result
        results_array.append(each_result)
        df_good = pd.DataFrame(results_array)
        df_good.to_csv('nyc-clinics-geocoded.csv',index=False)
        
    # sleep
        time.sleep(1)
        
    except:
        bad_addresses.append(place)
        df_bad = pd.DataFrame(bad_addresses)
        df_bad.to_csv('nyc-clinics-bad-addresses.csv',index=False)
        print(f"Couldn't find {place}")

100%|█████████████████████████████████████████| 483/483 [10:50<00:00,  1.35s/it]


In [16]:
df_geocoded = df.merge(df_good,left_on='full_address',right_on='address').drop({'address'},axis=1)
df_geocoded

Unnamed: 0,program_region,program_county,sponsor_name,agency_name,site_name,main_site,site_address,site_city,site_state,site_zip_code,site_county,population_simple,type,full_address,lat,long,geometry
0,New York City,Bronx,"Acacia Network, Inc.","PROMESA, Inc.",Westchester Center of Excellence,Y,915 Westchester Avenue 2nd Floor,Bronx,NY,10459,Bronx,Children & Adolescents,Community,"915 Westchester Avenue 2nd Floor, Bronx, NY 10459",40.821566,-73.898408,"[-73.8984076, 40.8215662]"
1,New York City,Bronx,Association to Benefit Children,Association to Benefit Children,Fast Break - The Bronx,Y,1366 Inwood Avenue,Bronx,NY,10452,Bronx,Children & Adolescents,Community,"1366 Inwood Avenue, Bronx, NY 10452",40.839864,-73.918820,"[-73.9188205, 40.8398641]"
2,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,Astor @ Mott Hall V,N,1551 East 172nd Street Room 216C,Bronx,NY,10472,Bronx,Children & Adolescents,Community,"1551 East 172nd Street Room 216C, Bronx, NY 10472",40.831887,-73.877282,"[-73.8772819, 40.8318874]"
3,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,Astor Clinic Treatment Program - Tilden,Y,750 Tilden Street,Bronx,NY,10467,Bronx,Children & Adolescents,Community,"750 Tilden Street, Bronx, NY 10467",40.876675,-73.862719,"[-73.862719, 40.8766745]"
4,New York City,Bronx,Astor Services for Children & Families,Astor Services for Children & Families,"Astor Services @ MS 80, The Isabel Rooney School",N,149 East Mosholu Parkway Room 414C,Bronx,NY,10467,Bronx,Adolescents,School,"149 East Mosholu Parkway Room 414C, Bronx, NY ...",40.876802,-73.881370,"[-73.8813704, 40.8768024]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
482,New York City,Richmond,Richmond Medical Center dba Richmond Universit...,Richmond Medical Center dba Richmond UMC,Port Richmond Satellite,N,166 Lockman Avenue Room 16,Staten Island,NY,10303,Richmond,Children,Community,"166 Lockman Avenue Room 16, Staten Island, NY ...",40.633147,-74.162037,"[-74.16203709999999, 40.6331468]"
483,New York City,Richmond,Richmond Medical Center dba Richmond Universit...,Richmond Medical Center dba Richmond UMC,Pouch Satellite,N,657 Castleton Avenue Administrative Building,Staten Island,NY,10301,Richmond,Children,Community,"657 Castleton Avenue Administrative Building, ...",40.635312,-74.103344,"[-74.1033443, 40.6353117]"
484,New York City,Richmond,Richmond Medical Center dba Richmond Universit...,Richmond Medical Center dba Richmond UMC,SIMHS Chait Clinic of RUMC @ Pediat,N,800 Castleton Avenue 2nd Floor,Staten Island,NY,10310,Richmond,Children & Adolescents,School,"800 Castleton Avenue 2nd Floor, Staten Island,...",40.634491,-74.109268,"[-74.1092682, 40.63449079999999]"
485,New York City,Richmond,Richmond Medical Center dba Richmond Universit...,Richmond Medical Center dba Richmond UMC,Staten Island MH Society CHAIT Clinic of RUMC,Y,669 Castleton Avenue,Staten Island,NY,10301,Richmond,Children & Adolescents,School,"669 Castleton Avenue, Staten Island, NY 10301",40.635264,-74.103762,"[-74.10376219999999, 40.6352635]"


In [17]:
df_geocoded.to_csv('nyc_child_adolescent_clinics_geocoded.csv',index=False)