In [None]:
# 1. find unique zip
# 2. get their lat,lng
# 3. acquire the list of parks nearby

In [251]:
import numpy as np
import pandas as pd
import requests
from time import sleep
import utility
import matplotlib.pyplot as plt

In [370]:
info_df_wt_fsq_type = pd.read_parquet('park_feature_with_predpop.parquet')
info_df_wt_fsq_type = info_df_wt_fsq_type[~np.isnan(info_df_wt_fsq_type['lat'])]
info_df_wt_fsq_type.loc[np.where(info_df_wt_fsq_type['pred_pop_residual']>150)[0],'pred_pop_residual'] = 150
info_df_wt_fsq_type.loc[np.where(info_df_wt_fsq_type['pred_pop_residual']<-150)[0],'pred_pop_residual'] = -150
info_df_wt_fsq_type = info_df_wt_fsq_type[info_df_wt_fsq_type['fsq_type_name_simplified'] != 'Shop & Service']

info_df_wt_fsq_type.head()

# info_df_wt_fsq_type.to_parquet('park_feature_with_predpop_clean.parquet')

## Find unique zip

In [110]:
import re
def get_zip(address):
    if address is not None:
        add_regex = r'(\d{5}), USA|United States'
        zipcode = re.search(add_regex, address)
        if zipcode is not None:
            zipcode = zipcode[1]
            return zipcode
        else:
            print(f"Didn't find zip for: {address}")

In [159]:
zipcode_all = info_df_wt_fsq_type['address'].apply(get_zip)
zipcode_geo = pd.DataFrame({'zipcode':zipcode_all})
zipcode_geo['address_city'] = info_df_wt_fsq_type['address_city']
zipcode_geo.drop_duplicates(inplace=True)  # (subset='zipcode', )sometimes one zip mapped to multiple cities, keep the first one
zipcode_geo.dropna(inplace=True)
zipcode_geo.reset_index(inplace=True)

Didn't find zip for: Gifford Pinchot State Park, Warrington Township, PA, USA
Didn't find zip for: Brooklyn, NY, USA
Didn't find zip for: New York, NY, USA
Didn't find zip for: New York, NY, USA
Didn't find zip for: Babylon, NY, USA


In [201]:
def geocode_zip_us(zipcode):
    params = { 'format'        :'json',    #API specific
               'addressdetails': 0, 
               'country'       : 'US',
               'postalcode'    : zipcode }
    headers = { 'user-agent'   : 'parkFinder' }   #  Need to supply a user agent other than the default provided 
                                           #  by requests for the API to accept the query.
    sleep(.1)
    result = requests.get('http://nominatim.openstreetmap.org/search', params=params, headers=headers)
    if len(result.json()) > 0:
        return result.json()[0]['lat'], result.json()[0]['lon']
    else:
        print(zipcode)

In [218]:
zip_more = ['21402', '21220', '18031', '19060', '08098', '08085', '08085', '19462', '19444', '08066', '08086',
            '08093', '15677', '16901', '07075', '10911', '07732', '11694', '11375', '11435', '11422', '11581']

zip_geo_more = [geocode_zip_us(zipcode) for zipcode in zip_more]

15677


In [219]:
zipcode_geo_2 = pd.DataFrame({'zipcode':zip_more})
zipcode_geo_2['lat'] = [x[0] if x is not None else None for x in zip_geo_more]
zipcode_geo_2['lng'] = [x[1] if x is not None else None for x in zip_geo_more]
zipcode_geo_2.dropna(inplace=True)

In [164]:
def geocode_zip(zip_info):
    '''
    zip_info: contains both info on zipcode and city, state
    
    '''
    if zip_info is not None:
        zipcode, city = zip_info                   # get info about the address
        city = city.split(',')
        params = { 'format'        :'json',    #API specific
                   'addressdetails': 0, 
                   'city'          : city[0],
                   'state'         : city[1],
                   'country'       : 'US',
                   'postalcode'    : zipcode}
        headers = { 'user-agent'   : 'parkFinder' }   #  Need to supply a user agent other than the default provided 
                                               #  by requests for the API to accept the query.
        sleep(.1)
        result = requests.get('http://nominatim.openstreetmap.org/search', params=params, headers=headers)
        if len(result.json()) > 0:
            return result.json()[0]['lat'], result.json()[0]['lon']
        else:
            print(zipcode)
    else:
        return None

In [211]:
zip_geo = zipcode_geo[['zipcode', 'address_city']].apply(geocode_zip, axis=1)

In [166]:
zipcode_geo['lat'] = [x[0] if x is not None else None for x in zip_geo]
zipcode_geo['lng'] = [x[1] if x is not None else None for x in zip_geo]

In [168]:
zipcode_geo.dropna(inplace=True)

In [368]:
#zipcode_geo.to_parquet('zipcode_geo.parquet')

## Acquire the list of parks that are nearby


In [303]:
distance_threshold_miles = 40

def get_nearby_parks(geo_lat_lng):
    lat, long = map(float, geo_lat_lng)
    df_tmp = info_df_wt_fsq_type[(info_df_wt_fsq_type[['lat', 'lng']].apply(
        utility.lat_lng_dist, axis=1, args=((lat, long),)) < distance_threshold_miles) & 
                             (~np.isnan(info_df_wt_fsq_type['pred_pop_residual']))]
    return list(df_tmp['id'])

In [305]:
zipcode_geo['nearby_parks'] = zipcode_geo[['lat', 'lng']].apply(get_nearby_parks, axis=1)

# zipcode_geo_2['nearby_parks'] = zipcode_geo_2[['lat', 'lng']].apply(get_nearby_parks, axis=1)

In [306]:
# zipcode_geo = zipcode_geo.append(zipcode_geo_2)
zipcode_geo = zipcode_geo.reset_index(drop=True)

In [307]:
zipcode_geo['number_nearby_parks'] = zipcode_geo['nearby_parks'].apply(lambda x: len(x))

In [310]:
zipcode_geo

Unnamed: 0,index,zipcode,address_city,lat,lng,nearby_parks,number_nearby_parks
0,0.0,21403,"Annapolis,MD",38.9786401,-76.492786,"[ChIJAWkAqNL1t4kRlm4slspOSXo, ChIJ_bk5K1z1t4kR...",43
1,7.0,21401,"Annapolis,MD",38.9786401,-76.492786,"[ChIJAWkAqNL1t4kRlm4slspOSXo, ChIJ_bk5K1z1t4kR...",43
2,10.0,21409,"Annapolis,MD",38.9786401,-76.492786,"[ChIJAWkAqNL1t4kRlm4slspOSXo, ChIJ_bk5K1z1t4kR...",43
3,15.0,21012,"Arnold,MD",39.0320784,-76.5025431,"[ChIJAWkAqNL1t4kRlm4slspOSXo, ChIJ_bk5K1z1t4kR...",45
4,16.0,21122,"Riviera Beach,MD",39.163942,-76.5065982,"[ChIJAWkAqNL1t4kRlm4slspOSXo, ChIJ_bk5K1z1t4kR...",55
...,...,...,...,...,...,...,...
1143,,11694,,40.57741976779511,-73.84673104881774,"[ChIJKw0kXxHGw4kRn0bEGob21as, ChIJKYT7QWbGw4kR...",948
1144,,11375,,40.71939841194835,-73.84789022426492,"[ChIJKw0kXxHGw4kRn0bEGob21as, ChIJKYT7QWbGw4kR...",983
1145,,11435,,40.699611878308914,-73.80841501243012,"[ChIJKw0kXxHGw4kRn0bEGob21as, ChIJKYT7QWbGw4kR...",977
1146,,11422,,40.663506792795644,-73.7353358514247,"[ChIJKw0kXxHGw4kRn0bEGob21as, ChIJKYT7QWbGw4kR...",971


## Combine with inaturallist info

In [245]:
info_with_inaturalist = pd.read_parquet('inaturalist/park_info_with_inaturalist.parquet')

In [360]:
def maxmin_norm(data):
    return (data - data.min())/(data.max() - data.min())

def extract_nearby_zip(zipcode='19103'):
    park_ids = zipcode_geo[zipcode_geo['zipcode']==zipcode]['nearby_parks'].iloc[0]
    df_tmp = info_df_wt_fsq_type[info_df_wt_fsq_type['id'].isin(park_ids)]
    df_tmp = df_tmp.merge(info_with_inaturalist, how='left', on=['id', 'name', 'lat', 'lng', 'address'])
    df_tmp.dropna(subset=['species_count'], inplace=True)
    df_tmp.reset_index(drop=True, inplace=True)
    # clean up data
    df_tmp.loc[np.where(df_tmp['species_count']>1500)[0],'species_count'] = 1500
    # normalize 
    df_tmp['species_count_norm'] = maxmin_norm(df_tmp['species_count'])
    df_tmp['rating_norm'] = maxmin_norm(df_tmp['rating'])
    df_tmp['traffic_norm'] = maxmin_norm(-df_tmp['pred_pop_residual'])
    
    df_tmp['total_score'] = df_tmp['species_count_norm'] + df_tmp['traffic_norm'] + df_tmp['rating_norm']

    return df_tmp

In [366]:
zipcode = '11581'
df_tmp = extract_nearby_zip(zipcode)
df_tmp

Unnamed: 0,id,name,rating,lat,lng,address,address_city,fsq_type_name_simplified,pred_pop_residual,inat_nb_place_id,...,Actinopterygii,Animalia,Amphibia,Mollusca,Protozoa,Chromista,species_count_norm,rating_norm,traffic_norm,total_score
0,ChIJKw0kXxHGw4kRn0bEGob21as,Rutgers Gardens,4.6,40.473419,-74.422737,"112 Log Cabin Rd, North Brunswick Township, NJ...","North Brunswick Township,NJ",Garden,3.612705,132720.0,...,,,47.0,,,,0.588667,0.818182,0.734591,2.141440
1,ChIJKYT7QWbGw4kRhuv9Yv5_oVk,Recreation Park,4.3,40.478957,-74.442670,"New Brunswick, NJ 08901, USA","New Brunswick,NJ",Playground,6.867851,132722.0,...,,,,,,15.0,0.647333,0.545455,0.718256,1.911044
2,ChIJe0JPXDfGw4kR_gaExaQxbPA,Donaldson Park,4.6,40.490365,-74.423984,"526 S 2nd Ave, Highland Park, NJ 08904, USA","Highland Park,NJ",,-16.165788,166154.0,...,,,,4.0,,,0.292000,0.818182,0.833842,1.944024
3,ChIJkdgC5EXGw4kR-KisvEMWhPw,Boyd Park,4.4,40.489994,-74.436514,"New Brunswick, NJ 08904, USA","New Brunswick,NJ",Park,2.653287,132722.0,...,,,,,,15.0,0.647333,0.636364,0.739406,2.023103
4,ChIJK1EV_DDGw4kRiVLbWRb960Y,Donaldson Dog Park,4.5,40.491552,-74.420775,"5 Parkview Terrace, Highland Park, NJ 08904, USA","Highland Park,NJ",Dog Run,-7.288751,166154.0,...,,,,4.0,,,0.292000,0.727273,0.789296,1.808569
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,ChIJC7t6zhT2wokR-E-yjtK8cuw,Playground 123,4.6,40.809890,-73.955742,"Park, 402-416 W 123rd St, New York, NY 10027, USA","New York,NY",Playground,-11.216099,162791.0,...,,,10.0,5.0,,,0.322667,0.818182,0.809004,1.949853
260,ChIJdZDsyj33wokRHIqaLkDPbK0,Jackie Robinson Park,4.4,40.827137,-73.940732,"Bradhurst Avenue &, W 151st St, New York, NY 1...","New York,NY",Playground,0.241322,,...,,,,,,,0.014667,0.636364,0.751509,1.402540
261,ChIJ73A_U6z1wokRm2a7OPx29mk,Bill Rainey Park,4.0,40.818866,-73.897439,"Beck Street &, Dawson St, The Bronx, NY 10459,...","The Bronx,NY",Park,18.639203,162752.0,...,,,,,,,0.001333,0.272727,0.659186,0.933247
262,ChIJ6Qg0N_n0wokRctu0TLzFcHw,Concrete Plant Park,4.3,40.826482,-73.885025,"Westchester Ave, The Bronx, NY 10472, USA","The Bronx,NY",Park,-21.127006,125177.0,...,3.0,6.0,,,,,0.089333,0.545455,0.858738,1.493526


In [372]:
df_tmp = df_tmp.sort_values(by=['total_score'], ascending=False)
df_tmp[:10]

Unnamed: 0,id,name,rating,lat,lng,address,address_city,fsq_type_name_simplified,pred_pop_residual,inat_nb_place_id,...,Actinopterygii,Animalia,Amphibia,Mollusca,Protozoa,Chromista,species_count_norm,rating_norm,traffic_norm,total_score
96,ChIJQwRohxBbwokRmHrfAMb3ixc,Prospect Park,4.7,40.660204,-73.968956,"Brooklyn, NY, USA","Brooklyn,NY",Park,-10.486436,,...,,,32.0,,,,1.0,0.909091,0.805343,2.714433
74,ChIJH5q1BzpawokR4ffTuJzssbQ,Brooklyn Bridge Park - Pier 1,4.8,40.702961,-73.994926,"2 Furman St, Brooklyn, NY 11201, USA","Brooklyn,NY",Travel & Transport,-0.300978,144399.0,...,11.0,32.0,,,,,0.951333,1.0,0.754231,2.705564
119,ChIJ32XY7h_2wokRXb54DHKYs00,North Meadow,4.7,40.792136,-73.958285,"102nd St Crossing, New York, NY 10029, USA","New York,NY",Field,-2.413114,49955.0,...,,,,,,,1.0,0.909091,0.76483,2.67392
217,ChIJCS0YquRhwokRO78O2n8USsc,Oakland Lake,4.6,40.758091,-73.760181,"56th Ave, Oakland Gardens, NY 11364, USA","Oakland Gardens,NY",Lake,-22.65812,125376.0,...,12.0,,123.0,,,,0.975333,0.818182,0.866422,2.659937
100,ChIJYfjRMQlbwokRUkgY4QX62_4,Vale of Cashmere,4.7,40.668765,-73.968237,"Prospect Park, Brooklyn, NY 11238, USA","Brooklyn,NY",Garden,4.055384,55174.0,...,,,32.0,,,,1.0,0.909091,0.73237,2.641461
258,ChIJd7iQrJBYwokROrpPbxeQsUc,The Great Lawn,4.7,40.781692,-73.966419,"79th Street & 85th Street, New York, NY 10024,...","New York,NY",Field,7.220463,125404.0,...,,,,,,,1.0,0.909091,0.716487,2.625578
71,ChIJkwX1AjdawokRx9reOccHFr4,Brooklyn Heights Promenade,4.8,40.696194,-73.997504,"Montague St &, Pierrepont Pl, Brooklyn, NY 112...","Brooklyn,NY",Scenic Lookout,-15.7129,64626.0,...,22.0,22.0,,,,,0.784,1.0,0.83157,2.61557
124,ChIJKZJlmqBYwokR8eWdVsifED8,East Meadow,4.6,40.790096,-73.955739,"5th Ave, New York, NY 10029, USA","New York,NY",Field,-5.689636,49955.0,...,,,,,,,1.0,0.818182,0.781272,2.599453
224,ChIJzyx69P9hwokRMP73UTW2hTA,Alley Pond Park,4.6,40.742395,-73.738434,"Union Tpke, Oakland Gardens, NY 11364, USA","Oakland Gardens,NY",Park,-3.74514,,...,,,127.0,,,,1.0,0.818182,0.771514,2.589696
160,ChIJyRUHuHzzwokRPxVBNKhcP9c,New York Botanical Garden,4.7,40.861705,-73.88069,"2900 Southern Blvd, The Bronx, NY 10458, USA","The Bronx,NY",Garden,14.805693,,...,,,43.0,,,,1.0,0.909091,0.678423,2.587514
