In [25]:
# import libraries
%matplotlib inline
import pandas as pd
import numpy as np
import io
import json
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize

In [26]:
# download the table
df = pd.read_csv('seattle_neighborhoods.csv')
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Belltown,47.6147,-122.3448
1,Broadmoor,47.6255,-122.2904
2,Capitol Hill,47.6253,-122.3222
3,Central District,47.6088,-122.2964
4,Denny-Blaine,47.6215,-122.2865


In [27]:
# get latitude and longitude of Seattle
address = 'Seattle, WA'

geolocator = Nominatim(user_agent="sea_explorer")
location = geolocator.geocode(address)
lat = location.latitude
lng = location.longitude
print('The geograpical coordinates of Seattle are {}, {}.'.format(lat, lng))

The geograpical coordinates of Seattle are 47.6038321, -122.3300624.


In [28]:
map_seattle = folium.Map(location=[lat, lng], zoom_start=10)

# add markers to map
for lat, lng, Neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(Neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_seattle)  
    
map_seattle

In [29]:
# implement foursquare credentials
CLIENT_ID = 'T3TZEZZLU2MUQ12S42VOCX1QAGMJBJJDGRTSY4JYY3JSMNNU' # your Foursquare ID
CLIENT_SECRET = 'LAIPM0VYIGXLD2G4N5VRDG54PQPQSGWJMMUDKT4ICOWNWRBT' # your Foursquare Secret
VERSION = '20191005' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
LIMIT = 100
radius = 500

Your credentails:
CLIENT_ID: T3TZEZZLU2MUQ12S42VOCX1QAGMJBJJDGRTSY4JYY3JSMNNU
CLIENT_SECRET:LAIPM0VYIGXLD2G4N5VRDG54PQPQSGWJMMUDKT4ICOWNWRBT


In [30]:
# create function for exploring Seattle neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
# get Seattle venues
seattle_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Belltown
Broadmoor
Capitol Hill
Central District
Denny-Blaine
Downtown
Eastlake
First Hill
International District
Leschi
Madison Park
Madison Valley
Madrona
Montlake
Pioneer Square
South Lake Union
Bryant
Hawthorne Hills
Lake City
Laurelhurst
Maple Leaf
Matthews Beach
Northgate
Ravenna
Roosevelt
U-District
View Ridge
Wedgwood
Windermere
Ballard
Blue Ridge/North Beach
Broadview
Crown Hill
Fremont
Green Lake
Greenwood
Haller Lake
Loyal Heights
Magnolia
Phinney Ridge
Queen Anne
Wallingford
Westlake
Whittier Heights
Beacon Hill
Columbia City
Georgetown
Mount Baker
Rainier Beach
South Park
Admiral
Alki
Fauntleroy
Junction


In [32]:
# get seattle venues info
print(seattle_venues.shape)
seattle_venues.head()
seattle_venues.groupby('Neighborhood').count()
print('There are {} unique categories.'.format(len(seattle_venues['Venue Category'].unique())))

(1595, 7)
There are 259 unique categories.


In [33]:
# one hot encoding
seattle_onehot = pd.get_dummies(seattle_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
seattle_onehot['Neighborhood'] = seattle_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [seattle_onehot.columns[-1]] + list(seattle_onehot.columns[:-1])
seattle_onehot = seattle_onehot[fixed_columns]

seattle_onehot.head()
seattle_onehot.shape
seattle_grouped = seattle_onehot.groupby('Neighborhood').mean().reset_index()
seattle_grouped
seattle_grouped.shape

(54, 259)

In [10]:
num_top_venues = 5

for hood in seattle_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = seattle_grouped[seattle_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Admiral----
            venue  freq
0     Coffee Shop  0.12
1         Theater  0.05
2             Pub  0.05
3  Ice Cream Shop  0.02
4             Spa  0.02


----Alki----
                venue  freq
0         Coffee Shop  0.10
1               Beach  0.06
2                Park  0.06
3  Italian Restaurant  0.06
4         Art Gallery  0.06


----Ballard----
               venue  freq
0               Park  0.14
1  Food & Drink Shop  0.14
2  French Restaurant  0.14
3           Bus Stop  0.14
4      Jewelry Store  0.14


----Beacon Hill----
                venue  freq
0  Light Rail Station  0.33
1  Miscellaneous Shop  0.33
2         Bus Station  0.33
3         Yoga Studio  0.00
4              Office  0.00


----Belltown----
              venue  freq
0               Bar  0.07
1       Coffee Shop  0.05
2  Sushi Restaurant  0.05
3      Cocktail Bar  0.04
4             Hotel  0.04


----Blue Ridge/North Beach----
           venue  freq
0           Café  0.25
1    Bus Station  0.25
2         

             venue  freq
0   Ice Cream Shop  0.07
1              Bar  0.07
2      Coffee Shop  0.07
3  Thai Restaurant  0.07
4     Noodle House  0.03


----Wedgwood----
         venue  freq
0  Coffee Shop  0.17
1   Steakhouse  0.08
2  Video Store  0.08
3          Pub  0.08
4     Pharmacy  0.08


----Westlake----
             venue  freq
0      Coffee Shop  0.11
1   Sandwich Place  0.11
2         Bus Stop  0.11
3  Harbor / Marina  0.11
4     Cocktail Bar  0.06


----Whittier Heights----
           venue  freq
0           Park  0.50
1  Deli / Bodega  0.25
2           Food  0.25
3    Yoga Studio  0.00
4         Museum  0.00


----Windermere----
           venue  freq
0    Pizza Place   1.0
1    Yoga Studio   0.0
2           Park   0.0
3  Movie Theater   0.0
4         Museum   0.0




In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [35]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = seattle_grouped['Neighborhood']

for ind in np.arange(seattle_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(seattle_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Admiral,Coffee Shop,Grocery Store,Pub,Gym / Fitness Center,Pizza Place
1,Alki,Park,Beach,Art Gallery,Italian Restaurant,Coffee Shop
2,Ballard,Coffee Shop,Hobby Shop,French Restaurant,Gift Shop,Jewelry Store
3,Beacon Hill,Light Rail Station,Bus Station,Women's Store,Fish & Chips Shop,Fair
4,Belltown,Bar,Coffee Shop,Sushi Restaurant,New American Restaurant,Bakery


In [36]:
# set number of clusters
kclusters = 5

seattle_grouped_clustering = seattle_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(seattle_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [37]:
seattle_merged = df

# merge seattle_grouped with seattle_data to add latitude/longitude for each neighborhood
seattle_merged = seattle_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

seattle_merged # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Belltown,47.6147,-122.3448,0,Bar,Coffee Shop,Sushi Restaurant,New American Restaurant,Bakery
1,Broadmoor,47.6255,-122.2904,0,French Restaurant,Italian Restaurant,Sushi Restaurant,Women's Store,Hardware Store
2,Capitol Hill,47.6253,-122.3222,0,Cocktail Bar,Coffee Shop,Bar,Restaurant,Italian Restaurant
3,Central District,47.6088,-122.2964,0,Ethiopian Restaurant,Bakery,Asian Restaurant,Café,Grocery Store
4,Denny-Blaine,47.6215,-122.2865,3,Park,Beach,Monument / Landmark,Surf Spot,Fair
5,Downtown,47.605,-122.3344,0,Hotel,Coffee Shop,Seafood Restaurant,New American Restaurant,Cocktail Bar
6,Eastlake,47.6418,-122.3265,0,Sandwich Place,Italian Restaurant,Coffee Shop,Playground,Park
7,First Hill,47.6094,-122.325,0,Sandwich Place,Coffee Shop,Hotel,Bakery,Pizza Place
8,International District,47.5987,-122.324,0,Chinese Restaurant,Vietnamese Restaurant,Café,Japanese Restaurant,Bakery
9,Leschi,47.6003,-122.2928,0,Playground,Coffee Shop,Gym,Pet Store,Grocery Store


In [38]:
# create map
map_clusters = folium.Map(location=[lat, lng], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(seattle_merged['Latitude'], seattle_merged['Longitude'], seattle_merged['Neighborhood'], seattle_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters