In [1]:
import pandas as pd
import numpy as np

In [2]:
MO_zip_codes= "https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&refine.state=MO&timezone=America/Chicago&lang=en&use_labels_for_header=true&csv_separator=%3B"
df_MO=pd.read_csv(MO_zip_codes, sep=';')


In [3]:
df_MO.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,64761,Leeton,MO,38.585719,-93.68518,-6,1,"38.585719,-93.68518"
1,65024,Chamois,MO,38.633533,-91.78052,-6,1,"38.633533,-91.78052"
2,63560,Pollock,MO,40.367584,-93.11823,-6,1,"40.367584,-93.11823"
3,63447,La Belle,MO,40.109412,-91.91078,-6,1,"40.109412,-91.91078"
4,63178,Saint Louis,MO,38.6531,-90.243462,-6,1,"38.6531,-90.243462"


## Filter the data with City=Saint Louis

In [4]:
df_SL=df_MO[(df_MO.City=="Saint Louis")]

In [5]:
df_SL.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
4,63178,Saint Louis,MO,38.6531,-90.243462,-6,1,"38.6531,-90.243462"
7,63113,Saint Louis,MO,38.656701,-90.24397,-6,1,"38.656701,-90.24397"
8,63134,Saint Louis,MO,38.738217,-90.33904,-6,1,"38.738217,-90.33904"
32,63144,Saint Louis,MO,38.619152,-90.34964,-6,1,"38.619152,-90.34964"
46,63197,Saint Louis,MO,38.6531,-90.243462,-6,1,"38.6531,-90.243462"


## Drop unwanted columns

In [6]:
cols = ['Timezone','Daylight savings time flag', 'geopoint']
df_SL = df_SL.drop(cols, axis=1)
df_SL.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude
4,63178,Saint Louis,MO,38.6531,-90.243462
7,63113,Saint Louis,MO,38.656701,-90.24397
8,63134,Saint Louis,MO,38.738217,-90.33904
32,63144,Saint Louis,MO,38.619152,-90.34964
46,63197,Saint Louis,MO,38.6531,-90.243462


In [7]:
df_SL.shape
## Our final dataframe has 71 samples and 5 columns or fields

(71, 5)

### Import libraries to visualise the Saint Louis City locations on a map

In [8]:
import json # library to handle JSON files

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    openssl-1.1.1g             |       h516909a_1         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.2 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geo

### Find the geographical coordinates of our target city of Saint Louis

In [10]:
address = 'Saint Louis, MO'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Saint Louis City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Saint Louis City are 38.6268039, -90.1994097.


In [11]:
# Finding the datatypes of our features
df_SL.dtypes

Zip            int64
City          object
State         object
Latitude     float64
Longitude    float64
dtype: object

In [12]:
#df_SL[['Zip', 'Latitude', 'Longitude']] = df_SL[['Zip', 'Latitude', 'Longitude']].apply(pd.to_numeric) 
#df_SL.dtypes

In [13]:
df_SL.shape

(71, 5)

### Map of Saint Louis City

In [14]:
# create map of Saint Louis using latitude and longitude values
map_SL = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_SL['Latitude'], df_SL['Longitude'], df_SL['Zip']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_SL)  
    
map_SL

### FourSquare credentials

In [18]:
CLIENT_ID = 'LIQHFZZVREGLWEHRS45FYKUXTTYXAXBI1GBEAWJN3AQERBDU' # your Foursquare ID
CLIENT_SECRET = '50B11GFYZ45GOHPN04GF0TUND0L352T5BI3NSW0R33GA11CZ'
VERSION = '20180605' # Foursquare API version

print('Kalpana Joshi')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Kalpana Joshi
CLIENT_ID: LIQHFZZVREGLWEHRS45FYKUXTTYXAXBI1GBEAWJN3AQERBDU
CLIENT_SECRET:50B11GFYZ45GOHPN04GF0TUND0L352T5BI3NSW0R33GA11CZ


### Let's Analyse the first zip code in our dataframe

In [22]:
neighborhood_latitude = df_SL.iloc[0,3] # neighborhood latitude value
neighborhood_longitude = df_SL.iloc[0,4] # neighborhood longitude value

neighborhood_name = df_SL.iloc[0,0] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of 63178 are 38.6531, -90.243462.


### Create a url for the API call to Foursquare to extract upto a 100 venues in a radius of 500km of the first Zip

In [23]:
radius=500
LIMIT=100

url= 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=LIQHFZZVREGLWEHRS45FYKUXTTYXAXBI1GBEAWJN3AQERBDU&client_secret=50B11GFYZ45GOHPN04GF0TUND0L352T5BI3NSW0R33GA11CZ&v=20180605&ll=38.6531,-90.243462&radius=500&limit=100'

### Make the API call to get all the possible venues in the surroundings of the first Zip

In [24]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f495d07eaf9a13289100c22'},
 'response': {'headerLocation': 'St Louis',
  'headerFullLocation': 'St Louis',
  'headerLocationGranularity': 'city',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 38.657600004500004,
    'lng': -90.23771048093934},
   'sw': {'lat': 38.6485999955, 'lng': -90.24921351906065}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4cd49c1289eb6dcb3352231e',
       'name': 'Harlem Tap Room',
       'location': {'lat': 38.65380422385766,
        'lng': -90.23861125389722,
        'labeledLatLngs': [{'label': 'display',
          'lat': 38.65380422385766,
          'lng': -90.23861125389722}],
        'distance': 428,
        'postalCode': '63113',
        'cc': 'US',
        'city': 'St Louis',
        's

In [25]:
#def get_category_type1(row):
 # categories_list = row['venue.categories']
  #return categories_list[0]['name']

### Clean the json and structure it into a pandas dataframe.

In [26]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Harlem Tap Room,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",38.653804,-90.238611
1,Family Dollar,"[{'id': '52dea92d3cf9994f4e043dbb', 'name': 'D...",38.655942,-90.244438
2,Martin's Market,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",38.655365,-90.243515
3,Brothers Diner,"[{'id': '4bf58dd8d48988d147941735', 'name': 'D...",38.653927,-90.238559


In [27]:
# We can observe the details of the Fist venue in the above dataframe
nearby_venues1=nearby_venues.iloc[0,1]
nearby_venues1

[{'id': '4bf58dd8d48988d116941735',
  'name': 'Bar',
  'pluralName': 'Bars',
  'shortName': 'Bar',
  'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/pub_',
   'suffix': '.png'},
  'primary': True}]

In [28]:
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Harlem Tap Room,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",38.653804,-90.238611
1,Family Dollar,"[{'id': '52dea92d3cf9994f4e043dbb', 'name': 'D...",38.655942,-90.244438
2,Martin's Market,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",38.655365,-90.243515
3,Brothers Diner,"[{'id': '4bf58dd8d48988d147941735', 'name': 'D...",38.653927,-90.238559


### Let us create a function to find nearby venues of all the Zip codes across Saint Louis

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Now, let's create a dataframe with the nearby venues for all the zip codes in Saint Louis by calling the above function

In [30]:
SaintLouis_venues= getNearbyVenues(names=df_SL['Zip'],
                                   latitudes=df_SL['Latitude'],
                                   longitudes=df_SL['Longitude']
                                  )

63178
63113
63134
63144
63197
63123
63179
63111
63151
63182
63195
63147
63118
63198
63120
63150
63106
63109
63132
63126
63116
63115
63196
63177
63188
63101
63102
63164
63157
63137
63119
63127
63145
63107
63169
63146
63125
63129
63122
63171
63160
63135
63114
63140
63104
63166
63167
63117
63141
63155
63112
63139
63108
63103
63124
63133
63180
63131
63121
63136
63156
63143
63130
63153
63138
63105
63110
63158
63163
63128
63199


### We can observe the first few rows of our dataframe with Saint Louis Venues across different Zip codes

In [31]:
SaintLouis_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,63178,38.6531,-90.243462,Harlem Tap Room,38.653804,-90.238611,Bar
1,63178,38.6531,-90.243462,Family Dollar,38.655942,-90.244438,Discount Store
2,63178,38.6531,-90.243462,Martin's Market,38.655365,-90.243515,Grocery Store
3,63178,38.6531,-90.243462,Brothers Diner,38.653927,-90.238559,Diner
4,63113,38.656701,-90.24397,Family Dollar,38.655942,-90.244438,Discount Store


In [32]:
SaintLouis_venues.shape

(566, 7)

#### We can see that there are a total of 566 venues across different Zip codes. Now, we can group our dataframe by the Zip codes to see how many venues were extracted across each Zip

In [33]:
SaintLouis_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
63101,85,85,85,85,85,85
63102,25,25,25,25,25,25
63103,26,26,26,26,26,26
63104,19,19,19,19,19,19
63105,14,14,14,14,14,14
...,...,...,...,...,...,...
63195,4,4,4,4,4,4
63196,4,4,4,4,4,4
63197,4,4,4,4,4,4
63198,3,3,3,3,3,3


### Unique categories of venues across all the zip codes are as follows:

In [34]:
print('There are {} uniques categories.'.format(len(SaintLouis_venues['Venue Category'].unique())))

There are 164 uniques categories.


### Analyse each neighborhood by using one hot coding
#### One hot coding is essential in performing the Clustering exercise which will help us identify the different types of venues across different Zips

In [39]:
# one hot encoding
SL_onehot = pd.get_dummies(SaintLouis_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
SL_onehot['Neighborhood'] = SaintLouis_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [SL_onehot.columns[-1]] + list(SL_onehot.columns[:-1])
SL_onehot = SL_onehot[fixed_columns]

SL_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Advertising Agency,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,...,Thrift / Vintage Store,Train Station,Vegetarian / Vegan Restaurant,Video Store,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio,Zoo
0,63178,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,63178,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,63178,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,63178,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,63113,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
# New Dataframe size
SL_onehot.shape

(566, 165)

### grouping rows by mean of frequency of each category

In [41]:
SL_grouped = SL_onehot.groupby('Neighborhood').mean().reset_index()
SL_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Advertising Agency,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,...,Thrift / Vintage Store,Train Station,Vegetarian / Vegan Restaurant,Video Store,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio,Zoo
0,63101,0.011765,0.0,0.011765,0.011765,0.0,0.000000,0.000000,0.011765,0.0,...,0.0,0.0,0.000000,0.011765,0.011765,0.0,0.011765,0.000000,0.0,0.0
1,63102,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
2,63103,0.000000,0.0,0.000000,0.076923,0.0,0.000000,0.038462,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
3,63104,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.052632,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
4,63105,0.000000,0.0,0.000000,0.071429,0.0,0.071429,0.000000,0.000000,0.0,...,0.0,0.0,0.071429,0.000000,0.000000,0.0,0.000000,0.071429,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,63195,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
65,63196,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
66,63197,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
67,63198,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0


### Let's analyse each Neighborhood/Zip with top 5 venues

In [42]:
num_top_venues = 5

for hood in SL_grouped['Neighborhood']:
    print("----",hood, "----")
    temp = SL_grouped[SL_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- 63101 ----
                venue  freq
0      Sandwich Place  0.07
1                 Bar  0.06
2               Hotel  0.06
3         Coffee Shop  0.05
4  Italian Restaurant  0.04


---- 63102 ----
                venue  freq
0               Hotel  0.20
1          Steakhouse  0.12
2                 Bar  0.08
3              Casino  0.08
4  Italian Restaurant  0.08


---- 63103 ----
                 venue  freq
0           Food Truck  0.23
1         Intersection  0.12
2  American Restaurant  0.08
3                Hotel  0.08
4              Brewery  0.04


---- 63104 ----
                venue  freq
0        Intersection  0.21
1  Chinese Restaurant  0.11
2      Ice Cream Shop  0.05
3         Supermarket  0.05
4         Gas Station  0.05


---- 63105 ----
                           venue  freq
0                  Deli / Bodega  0.07
1  Vegetarian / Vegan Restaurant  0.07
2                      Hotel Bar  0.07
3                          Hotel  0.07
4                            Gym  0.07


### Let's create a dataframe of the top 10 venues across each of the Neighborhoods or Zips

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [85]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = SL_grouped['Neighborhood']

for ind in np.arange(SL_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(SL_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,63101,Sandwich Place,Hotel,Bar,Coffee Shop,Café,Italian Restaurant,Mobile Phone Shop,Mediterranean Restaurant,Salon / Barbershop,Pizza Place
1,63102,Hotel,Steakhouse,Casino,Bar,Restaurant,Italian Restaurant,Athletics & Sports,Spa,Football Stadium,Sports Club
2,63103,Food Truck,Intersection,Hotel,American Restaurant,Pub,Automotive Shop,Brewery,Soccer Stadium,Sandwich Place,Fast Food Restaurant
3,63104,Intersection,Chinese Restaurant,Pub,Gas Station,Print Shop,Pharmacy,Pet Service,Fast Food Restaurant,Brewery,Steakhouse
4,63105,Hotel Bar,Arcade,Seafood Restaurant,Steakhouse,Bar,Bakery,Automotive Shop,Italian Restaurant,Deli / Bodega,Hotel


### Cluster Neighborhoods using K Means

In [86]:
# import k-means from clustering stage
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

SL_grouped_clustering = SL_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(SL_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 3, 1, 1, 1, 1, 1, 4, 1, 4, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1,
       0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 2, 0], dtype=int32)

In [87]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

SL_merged = df_SL

In [88]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,63101,Sandwich Place,Hotel,Bar,Coffee Shop,Café,Italian Restaurant,Mobile Phone Shop,Mediterranean Restaurant,Salon / Barbershop,Pizza Place
1,1,63102,Hotel,Steakhouse,Casino,Bar,Restaurant,Italian Restaurant,Athletics & Sports,Spa,Football Stadium,Sports Club
2,1,63103,Food Truck,Intersection,Hotel,American Restaurant,Pub,Automotive Shop,Brewery,Soccer Stadium,Sandwich Place,Fast Food Restaurant
3,1,63104,Intersection,Chinese Restaurant,Pub,Gas Station,Print Shop,Pharmacy,Pet Service,Fast Food Restaurant,Brewery,Steakhouse
4,1,63105,Hotel Bar,Arcade,Seafood Restaurant,Steakhouse,Bar,Bakery,Automotive Shop,Italian Restaurant,Deli / Bodega,Hotel


In [89]:
neighborhoods_venues_sorted.dtypes

Cluster Labels             int32
Neighborhood               int64
1st Most Common Venue     object
2nd Most Common Venue     object
3rd Most Common Venue     object
4th Most Common Venue     object
5th Most Common Venue     object
6th Most Common Venue     object
7th Most Common Venue     object
8th Most Common Venue     object
9th Most Common Venue     object
10th Most Common Venue    object
dtype: object

In [90]:
neighborhoods_venues_sorted.rename(columns = {'Neighborhood':'Zip'}, inplace = True) 

In [91]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,63101,Sandwich Place,Hotel,Bar,Coffee Shop,Café,Italian Restaurant,Mobile Phone Shop,Mediterranean Restaurant,Salon / Barbershop,Pizza Place
1,1,63102,Hotel,Steakhouse,Casino,Bar,Restaurant,Italian Restaurant,Athletics & Sports,Spa,Football Stadium,Sports Club
2,1,63103,Food Truck,Intersection,Hotel,American Restaurant,Pub,Automotive Shop,Brewery,Soccer Stadium,Sandwich Place,Fast Food Restaurant
3,1,63104,Intersection,Chinese Restaurant,Pub,Gas Station,Print Shop,Pharmacy,Pet Service,Fast Food Restaurant,Brewery,Steakhouse
4,1,63105,Hotel Bar,Arcade,Seafood Restaurant,Steakhouse,Bar,Bakery,Automotive Shop,Italian Restaurant,Deli / Bodega,Hotel


In [92]:
# merge SL_grouped with SL_data to add latitude/longitude for each neighborhood
SL_merged1= pd.merge(SL_merged, neighborhoods_venues_sorted, on='Zip', how='right')
SL_merged1.head() # check the last columns!

Unnamed: 0,Zip,City,State,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,63178,Saint Louis,MO,38.6531,-90.243462,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
1,63113,Saint Louis,MO,38.656701,-90.24397,0,Discount Store,Grocery Store,Zoo,Factory,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
2,63144,Saint Louis,MO,38.619152,-90.34964,1,Rental Car Location,Coffee Shop,Italian Restaurant,Pharmacy,Lawyer,Chinese Restaurant,Donut Shop,Salon / Barbershop,Bank,Zoo
3,63197,Saint Louis,MO,38.6531,-90.243462,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
4,63123,Saint Louis,MO,38.549452,-90.32525,1,Lounge,Ice Cream Shop,Clothing Store,River,Scenic Lookout,Zoo,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


In [93]:
SL_merged1.shape


(69, 16)

In [94]:
SL_merged1.dtypes

Zip                         int64
City                       object
State                      object
Latitude                  float64
Longitude                 float64
Cluster Labels              int32
1st Most Common Venue      object
2nd Most Common Venue      object
3rd Most Common Venue      object
4th Most Common Venue      object
5th Most Common Venue      object
6th Most Common Venue      object
7th Most Common Venue      object
8th Most Common Venue      object
9th Most Common Venue      object
10th Most Common Venue     object
dtype: object

### Visualising clusters

In [95]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]



In [96]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(SL_merged1['Latitude'], SL_merged1['Longitude'], SL_merged1['Zip'], SL_merged1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
map_clusters

### Cluster 1

In [97]:
SL_merged1.loc[SL_merged1['Cluster Labels'] == 0, SL_merged1.columns[[0] + list(range(5, SL_merged1.shape[1]))]]

Unnamed: 0,Zip,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,63178,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
1,63113,0,Discount Store,Grocery Store,Zoo,Factory,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
3,63197,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
5,63179,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
6,63111,0,Bar,Café,Museum,Dive Bar,Zoo,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant
8,63182,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
9,63195,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
14,63150,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop
15,63106,0,Bar,Zoo,Farm,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
21,63196,0,Diner,Discount Store,Grocery Store,Bar,Zoo,Farm,Food,Flower Shop,Flea Market,Fish & Chips Shop


### Cluster 2

In [98]:
SL_merged1.loc[SL_merged1['Cluster Labels'] == 1, SL_merged1.columns[[0] + list(range(5, SL_merged1.shape[1]))]]

Unnamed: 0,Zip,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,63144,1,Rental Car Location,Coffee Shop,Italian Restaurant,Pharmacy,Lawyer,Chinese Restaurant,Donut Shop,Salon / Barbershop,Bank,Zoo
4,63123,1,Lounge,Ice Cream Shop,Clothing Store,River,Scenic Lookout,Zoo,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
10,63147,1,Wine Bar,Zoo,Eastern European Restaurant,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
11,63118,1,Mexican Restaurant,Fried Chicken Joint,Grocery Store,Bar,Bakery,Beer Store,Tea Room,Food Truck,Coffee Shop,Brewery
13,63120,1,Food Court,Café,Food Truck,Performing Arts Venue,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
16,63109,1,Pizza Place,Mexican Restaurant,Public Art,Mobile Phone Shop,Liquor Store,Automotive Shop,Flower Shop,Record Shop,Ice Cream Shop,Farm
17,63132,1,Dance Studio,Flea Market,Fast Food Restaurant,Pharmacy,Sushi Restaurant,Business Service,Bakery,Gas Station,Farm,Flower Shop
18,63126,1,Salon / Barbershop,Hobby Shop,Food Truck,Moving Target,Cocktail Bar,Eastern European Restaurant,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
19,63116,1,Mexican Restaurant,Italian Restaurant,Zoo,Seafood Restaurant,Food,Museum,Cosmetics Shop,Sports Bar,Rugby Pitch,Lounge
20,63115,1,Construction & Landscaping,Moving Target,Factory,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [99]:
SL_merged1.loc[SL_merged1['Cluster Labels'] == 2, SL_merged1.columns[[0] + list(range(5, SL_merged1.shape[1]))]]

Unnamed: 0,Zip,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,63151,2,Pub,Resort,Fast Food Restaurant,Donut Shop,Flower Shop,Flea Market,Fish & Chips Shop,Farmers Market,Farm,Factory
12,63198,2,Pub,Resort,Fast Food Restaurant,Donut Shop,Flower Shop,Flea Market,Fish & Chips Shop,Farmers Market,Farm,Factory
31,63145,2,Pub,Resort,Fast Food Restaurant,Donut Shop,Flower Shop,Flea Market,Fish & Chips Shop,Farmers Market,Farm,Factory
45,63167,2,Pub,Resort,Fast Food Restaurant,Donut Shop,Flower Shop,Flea Market,Fish & Chips Shop,Farmers Market,Farm,Factory


In [100]:
SL_merged1.loc[SL_merged1['Cluster Labels'] == 3, SL_merged1.columns[[0] + list(range(5, SL_merged1.shape[1]))]]

Unnamed: 0,Zip,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,63125,3,Theater,Zoo,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Factory


In [101]:
SL_merged1.loc[SL_merged1['Cluster Labels'] == 4, SL_merged1.columns[[0] + list(range(5, SL_merged1.shape[1]))]]

Unnamed: 0,Zip,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
54,63133,4,Garden Center,Zoo,Factory,Food,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
56,63131,4,Lounge,Garden Center,Zoo,Flower Shop,Flea Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Factory
