In [None]:
# starting point code = 
# Coursea // IBM Data Science Professional Certificate // Course 9 // Week 3) // " Neighborhoods in New York City"

import pandas as pd
import urllib.request, json

borough_analyze = ("Manhattan", "Brooklyn")

with urllib.request.urlopen("https://geo.nyu.edu/download/file/nyu-2451-34572-geojson.json") as url:
    newyork_data = json.load(url)
    neighborhoods_data = newyork_data['features']
    
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)


for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    
    if borough in borough_analyze:
        neighborhood_name = data['properties']['name']

        neighborhood_latlon = data['geometry']['coordinates']
        neighborhood_lat = neighborhood_latlon[1]
        neighborhood_lon = neighborhood_latlon[0]

        neighborhoods = neighborhoods.append({'Borough': borough,
                                              'Neighborhood': neighborhood_name,
                                              'Latitude': neighborhood_lat,
                                              'Longitude': neighborhood_lon}, ignore_index=True)
         
neighborhood_count_manhattan = sum(neighborhoods['Borough'] == "Manhattan")
print(f"Manhattan has {neighborhood_count_manhattan} neighborhoods")

neighborhood_count_brooklyn = sum(neighborhoods['Borough'] == "Brooklyn")
print(f"Brooklyn has {neighborhood_count_brooklyn} neighborhoods")


In [None]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address_Manhattan = 'Manhattan, NY'
address_Brooklyn = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location_Manattan = geolocator.geocode(address_Manhattan)
location_Brooklyn = geolocator.geocode(address_Brooklyn)

latitude = (location_Manattan.latitude + location_Brooklyn.latitude)/2
longitude = (location_Manattan.longitude + location_Brooklyn.longitude)/2
print('The geograpical coordinate of Manhattan/Brooklyn are {}, {}.'.format(latitude, longitude))



In [None]:
import folium

# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [None]:
import json, requests

# CBJ1TCAYGC2K43HEI5CBX5WF5IYQU41YCQFXYUJSDRV34JTW
# XQOH02ZW2L2KW04WAEXXU2U5FLCM4QDH5NSQIHKA1QZBFQEX

# B0LPCPNQLDCNHSV3KLKMLY0XMKD0NGH40ZJZM2DSM3FNHBTC
# 2SJFKJTXBG4D21XFM5S2JP5IZRMV0LVOQ3UZRP0YLDWVHJW3
def getNearbyVenues(neighborhoods, category_id, radius=1000, limit=200):
    venues_list=[]
    for row in neighborhoods.itertuples():
        base_url = 'https://api.foursquare.com/v2/venues/explore?&'
        url = '{}client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            base_url,
            '0IYK3MFUCQLTP4W4Q3E1ESKOQJ4SVVADK1XTAUCBLPUXXSTG', 
            '0XGXDKOSWKS1HLRB5PIFTGYRFJLFRN2OLD4NV2FSSCCOM21I', 
            '20180323', 
            row.Latitude, 
            row.Longitude, 
            radius,
            limit,
            category_id)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        if(not results):
            print(row.Borough, "//", row.Neighborhood, "has 0 result")
        else:
            print(row.Borough, "//", row.Neighborhood, "has", len(results), "result")
            for v in results:
                # return only relevant information for each nearby venue
                try:
                    category = v['venue']['categories'][0]['name']
                    if (category=="Vegetarian / Vegan Restaurant" or category=="Steakhouse"):
                        venues_list.append((
                            row.Borough,
                            row.Neighborhood, 
                            row.Latitude, 
                            row.Longitude, 
                            v['venue']['name'], 
                            v['venue']['location']['lat'], 
                            v['venue']['location']['lng'],  
                            v['venue']['categories'][0]['name']))
                except:
                    continue

    nearby_venues = pd.DataFrame(venues_list)
    nearby_venues.columns = [
            'Borough',
            'Neighborhood', 
            'Neighborhood Latitude', 
            'Neighborhood Longitude', 
            'Venue', 
            'Venue Latitude', 
            'Venue Longitude', 
            'Venue Category']
    
    
    return(nearby_venues)

category_steak_house = "4bf58dd8d48988d1cc941735"
category_veg = "4bf58dd8d48988d1d3941735"
venues = getNearbyVenues(neighborhoods, f"{category_steak_house},{category_veg}")



In [None]:
display(venues.head())

In [None]:
import numpy as np

min_count = 5
venues_grouped = pd.pivot_table(venues[["Neighborhood", "Venue Category"]], index="Neighborhood", columns="Venue Category",
                                aggfunc= np.size, fill_value=0)

venues_grouped_filtered = venues_grouped[(venues_grouped["Steakhouse"] >= min_count) | (venues_grouped["Vegetarian / Vegan Restaurant"] >= min_count)]

print("Before applying min_count =", min_count, " there are", len(venues_grouped.index), "neighborhoods")
print("Before applying min_count =", min_count, " there are", len(venues.index), "venues")
display(venues_grouped.head().reset_index())

neighborhoods_kept = venues_grouped_filtered.index
venues_filtered = venues[venues["Neighborhood"].isin(neighborhoods_kept)]

print("After applying min_count =", min_count, " there are", len(venues_grouped_filtered.index), "neighborhoods")
print("After applying min_count =", min_count, " there are", len(venues_filtered.index), "venues")

onehot = pd.get_dummies(venues_filtered[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = venues_filtered['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

venues_filtered_grouped = onehot.groupby('Neighborhood').mean().reset_index()

display(venues_filtered_grouped.head())

In [None]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues_filtered_merged['Venue Latitude'], venues_filtered_merged['Venue Longitude'], venues_filtered_merged['Neighborhood'], venues_filtered_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

map_clusters

In [None]:
from sklearn.cluster import KMeans

kclusters = 3

venues_filtered_grouped_clustering = venues_filtered_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(venues_filtered_grouped_clustering)   # run k-means clustering
venues_filtered_grouped.insert(0, 'Cluster Labels', kmeans.labels_)   # add clustering labels

venues_filtered_merged = venues_filtered

# merge cluster label with list of venues 
venues_filtered_merged = venues_filtered_merged.join(venues_filtered_grouped.set_index('Neighborhood'), on='Neighborhood')
venues_filtered_merged.sort_values('Cluster Labels', inplace=True)
venues_filtered_merged.reset_index(inplace=True, drop=True)

# translate meaning of cluster labels
venues_filtered_grouped = venues_filtered_grouped.sort_values("Cluster Labels").reset_index(drop=True)
cluster_groups = venues_filtered_grouped.groupby('Cluster Labels').mean().reset_index()
display(cluster_groups)
display(venues_filtered_grouped)