## Capstone project, location for a new restaurent in Chicago, IL, USA

#### 1, Import Libraries

In [1]:
# import libraries section 
import numpy as np                        # library for vectorized computation
import pandas as pd                       # library to process data as dataframes

import requests                           # library to handle requests
from bs4 import BeautifulSoup             # library to parse HTML and XML documents

import json                               # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim     # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# install folium
!pip install folium

# import folium
import folium                              # map rendering library

print('Libraries imported!.')

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.8 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1
Libraries imported!.


#### 2, Import the Chicago neighborhood file chicagoneighborhood_2021.csv from google storage bucket.

In [2]:
# read csv file from google storage bucket.
##del df  # delete dataframe df before rerunning

df = pd.read_csv(r"https://storage.googleapis.com/natemano-courcera-chicago2021a/chicagoneighborhood_2021.csv", encoding = "ISO-8859-1")
df.head(6)

Unnamed: 0,Neighborhood,Community Area
0,Albany Park,Albany Park
1,Mayfair,Albany Park
2,North Mayfair,Albany Park
3,Ravenswood Manor,Albany Park
4,Archer Heights,Archer Heights
5,Armour Square,Armour Square


#### 3, Get geolocator codes for Chicago neighborhoods and build new dataframe

In [3]:
# define the dataframe columns
column_names = ['Community_Area', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the new dataframe
neighborhoods_chi = pd.DataFrame(columns=column_names)
neighborhoods_chi

Unnamed: 0,Community_Area,Neighborhood,Latitude,Longitude


In [4]:
# get Chicago neighborhood Latitude	Longitude
print('start read')

for idx, row in df.iterrows():

    neighborhood_name = row['Neighborhood']
    community_area = row['Community Area']
    
    address = neighborhood_name +', IL'
    #print('address ', address) # for debugging data
    
    geolocator = Nominatim(user_agent="chi_explorer")
    location   = geolocator.geocode(address)
    latitude   = location.latitude
    longitude  = location.longitude
    #print('out',location.latitude, location.longitude)  # for debugging data
     
    neighborhoods_chi = neighborhoods_chi.append({'Community_Area': community_area,
                                          'Neighborhood': neighborhood_name+(', IL'),
                                          'Latitude': latitude,
                                          'Longitude': longitude}, ignore_index=True)

#print('end read!')
neighborhoods_chi.head(6)

start read


Unnamed: 0,Community_Area,Neighborhood,Latitude,Longitude
0,Albany Park,"Albany Park, IL",41.971937,-87.716174
1,Albany Park,"Mayfair, IL",51.511087,-0.147058
2,Albany Park,"North Mayfair, IL",33.462919,-111.75561
3,Albany Park,"Ravenswood Manor, IL",41.964622,-87.70138
4,Archer Heights,"Archer Heights, IL",41.811422,-87.726165
5,Armour Square,"Armour Square, IL",41.840231,-87.632986


#### 4, create a map of chicago using latitude and longitude values. Use geopy library to get the latitude and longitude values of Chicago, IL

In [6]:
# Use geopy library to get the latitude and longitude values of Chicago, IL
print('start')
address = 'Chicago, IL'
geolocator    = Nominatim(user_agent="nm-application") # specify appliaction name
location      = geolocator.geocode(address)
latitude_chi  = location.latitude
longitude_chi = location.longitude
print('The geograpical coordinate of Chicago, IL are {}, {}.'.format(latitude_chi, longitude_chi))
print('done')

start
The geograpical coordinate of Chicago, IL are 41.8755616, -87.6244212.
done


In [7]:
#create a map of chicago using latitude and longitude values
print('start')
map_chicago = folium.Map(location=[latitude_chi, longitude_chi], zoom_start=10)

#add markers to map
#for borough, neighborhood, lat, lng in (neighborhoods_chi['Community Area'], neighborhoods_chi['Neighborhood'], neighborhoods_chi['Latitude'], neighborhoods_chi['Longitude']):
for borough, neighborhood, lat, lng in zip(neighborhoods_chi['Community_Area'], neighborhoods_chi['Neighborhood'], neighborhoods_chi['Latitude'], neighborhoods_chi['Longitude']):

    label = '{}, {}'.format(neighborhood, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_chicago)  
    
map_chicago

start


#### 5, Foursquare Api for getting Nearby Venues

In [8]:
## Foursquare api set client account
CLIENT_ID = 'JXSHDM3INM2TUNT1Z3MVT2C3JUJC3LWVNOA1UTQ5A04BZEXZ' # your Foursquare ID
CLIENT_SECRET = 'T2YAIOY2B0YULJ1YJZXXTE4CCCAFZEVJ23XUQICT2AB4ZUA3' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 50
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: JXSHDM3INM2TUNT1Z3MVT2C3JUJC3LWVNOA1UTQ5A04BZEXZ
CLIENT_SECRET:T2YAIOY2B0YULJ1YJZXXTE4CCCAFZEVJ23XUQICT2AB4ZUA3


In [9]:
# Define function getNearbyVenues
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### 6, Execute the above function on each neighborhood and create a new dataframe called chicago_venues.

In [10]:
# data from neighborhoods_chi

chicago_venues = getNearbyVenues(names=neighborhoods_chi['Neighborhood'],
                                   latitudes=neighborhoods_chi['Latitude'],
                                   longitudes=neighborhoods_chi['Longitude']
                                  )

Albany Park, IL
Mayfair, IL
North Mayfair, IL
Ravenswood Manor, IL
Archer Heights, IL
Armour Square, IL
Chinatown, IL
Wentworth Gardens, IL
Ashburn, IL
Ashburn Estates, IL
Beverly View, IL
Crestline, IL
Parkview, IL
Scottsdale, IL
Wrightwood, IL
Auburn Gresham, IL
Gresham, IL
Galewood, IL
North Austin, IL
South Austin, IL
The Island, IL
West Humboldt Park, IL
Avalon Park, IL
Marynook, IL
Stony Island Park, IL
Avondale, IL
Jackowo, IL
Polish Village, IL
Belmont Central, IL
Brickyard, IL
Cragin, IL
Hanson Park, IL
Beverly, IL
East Beverly, IL
West Beverly, IL
Bridgeport, IL
Brighton Park, IL
Burnside, IL
Calumet Heights, IL
Pill Hill, IL
Chatham, IL
East Chatham, IL
West Chatham, IL
West Chesterfield, IL
Chicago Lawn, IL
Lithuanian Plaza, IL
Marquette Park, IL
Chrysler Village, IL
Clearing East, IL
Clearing West, IL
Bronzeville, IL
Dearborn Homes, IL
Groveland Park, IL
Lake Meadows, IL
Prairie Shores, IL
South Commons, IL
Stateway Gardens, IL
The Gap, IL
Belmont Heights, IL
Belmont Terra

In [11]:
## check the size and head() of the resulting dataframe

print(chicago_venues.shape)
chicago_venues.head(11)

(3707, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Albany Park, IL",41.971937,-87.716174,Chicago Produce,41.970553,-87.716327,Grocery Store
1,"Albany Park, IL",41.971937,-87.716174,Cairo Nights Hookah Lounge,41.975776,-87.715547,Hookah Bar
2,"Albany Park, IL",41.971937,-87.716174,Nighthawk,41.967974,-87.713415,Cocktail Bar
3,"Albany Park, IL",41.971937,-87.716174,Peking Mandarin Resturant,41.968292,-87.715783,Chinese Restaurant
4,"Albany Park, IL",41.971937,-87.716174,Popeyes Louisiana Kitchen,41.968756,-87.713019,Fried Chicken Joint
5,"Albany Park, IL",41.971937,-87.716174,Hiromi's Oriental Restaurant,41.968144,-87.718719,Karaoke Bar
6,"Albany Park, IL",41.971937,-87.716174,Markellos Baking Company,41.968602,-87.716607,Bakery
7,"Albany Park, IL",41.971937,-87.716174,Banpojung,41.975707,-87.715609,Korean Restaurant
8,"Albany Park, IL",41.971937,-87.716174,Subway,41.968748,-87.712861,Sandwich Place
9,"Albany Park, IL",41.971937,-87.716174,T-Mobile,41.968751,-87.713158,Mobile Phone Shop


In [12]:
## Check number of venues returned for each neighborhood
chicago_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Albany Park, IL",12,12,12,12,12,12
"Altgeld Gardens, IL",3,3,3,3,3,3
"Andersonville, IL",4,4,4,4,4,4
"Archer Heights, IL",16,16,16,16,16,16
"Armour Square, IL",13,13,13,13,13,13
...,...,...,...,...,...,...
"West Woodlawn, IL",1,1,1,1,1,1
"Wicker Park, IL",50,50,50,50,50,50
"Wildwood, IL",20,20,20,20,20,20
"Wrightwood, IL",16,16,16,16,16,16


In [13]:
## check new dataframe size.
chicago_venues.shape

(3707, 7)

### 7, Check the number of unique categories that can be curated from all the returned venues.

In [14]:
## Check number of unique categories that can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(chicago_venues['Venue Category'].unique())))

There are 376 uniques categories.


### 8, Let's Analyze each neighborhood

In [15]:
## Let's Analyze Each Neighborhood
# one hot encoding
chicago_onehot = pd.get_dummies(chicago_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chicago_onehot['Neighborhood'] = chicago_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [chicago_onehot.columns[-1]] + list(chicago_onehot.columns[:-1])
chicago_onehot = chicago_onehot[fixed_columns]

chicago_onehot.head(20)

Unnamed: 0,Zoo,ATM,Accessories Store,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vietnamese Restaurant,Vineyard,Warehouse Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
## let's analyze the new dataframe size.
chicago_onehot.shape

(3707, 376)

### 9, Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [17]:
##Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
chicago_grouped = chicago_onehot.groupby('Neighborhood').mean().reset_index()
chicago_grouped

Unnamed: 0,Neighborhood,Zoo,ATM,Accessories Store,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,...,Vietnamese Restaurant,Vineyard,Warehouse Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Albany Park, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
1,"Altgeld Gardens, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
2,"Andersonville, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
3,"Archer Heights, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0625,0.0,0.0
4,"Armour Square, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,"West Woodlawn, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
212,"Wicker Park, IL",0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
213,"Wildwood, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0
214,"Wrightwood, IL",0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0000,0.0,0.0


In [18]:
## Lets confirm the new size

chicago_grouped.shape

(216, 376)

### 10, Print each neighborhood along with the top 5 most common venues

In [19]:
# set top venues to 5 & Print each neighborhood along with 5 common venues
num_top_venues = 5

for hood in chicago_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = chicago_grouped[chicago_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Albany Park, IL----
                       venue  freq
0             Sandwich Place  0.08
1          Mobile Phone Shop  0.08
2  Latin American Restaurant  0.08
3                Karaoke Bar  0.08
4                 Donut Shop  0.08


----Altgeld Gardens, IL----
                  venue  freq
0                  Food  0.33
1         Grocery Store  0.33
2                  Park  0.33
3       Other Nightlife  0.00
4  Other Great Outdoors  0.00


----Andersonville, IL----
                 venue  freq
0       History Museum  0.50
1   Travel & Transport  0.25
2  American Restaurant  0.25
3            Nightclub  0.00
4      Other Nightlife  0.00


----Archer Heights, IL----
                venue  freq
0  Mexican Restaurant  0.19
1       Grocery Store  0.12
2   Mobile Phone Shop  0.12
3       Big Box Store  0.06
4                Bank  0.06


----Armour Square, IL----
                venue  freq
0  Chinese Restaurant  0.31
1                Café  0.08
2  Italian Restaurant  0.08
3       Grocery S

### 11, Create the new dataframe and display the top 10 venues for each neighborhood.
#### define function return_most_common_venues and create dataframe neighborhoods_venues_sorted.

In [20]:
## define function return_most_common_venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
## create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = chicago_grouped['Neighborhood']

for ind in np.arange(chicago_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(chicago_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(6)


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albany Park, IL",Sandwich Place,Grocery Store,Bakery,Mobile Phone Shop,Hookah Bar,Donut Shop,Chinese Restaurant,Latin American Restaurant,Cocktail Bar,Korean Restaurant
1,"Altgeld Gardens, IL",Food,Grocery Store,Park,Dry Cleaner,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
2,"Andersonville, IL",History Museum,American Restaurant,Travel & Transport,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
3,"Archer Heights, IL",Mexican Restaurant,Grocery Store,Mobile Phone Shop,Bank,Candy Store,Coffee Shop,Big Box Store,Gym / Fitness Center,Gas Station,Sandwich Place
4,"Armour Square, IL",Chinese Restaurant,Cosmetics Shop,Italian Restaurant,Grocery Store,Gas Station,Asian Restaurant,Café,Sandwich Place,Hot Dog Joint,Ice Cream Shop
5,"Ashburn, IL",Lake,Yoga Studio,Dry Cleaner,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service


### 12, Cluster Neighborhoods
### Run k-means to cluster the neighborhood into 5 clusters.

In [22]:
## review df chicago_grouped
chicago_grouped.head()

Unnamed: 0,Neighborhood,Zoo,ATM,Accessories Store,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,...,Vietnamese Restaurant,Vineyard,Warehouse Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Albany Park, IL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Altgeld Gardens, IL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Andersonville, IL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Archer Heights, IL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0
4,"Armour Square, IL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
# set number of clusters to 5, fit(chicago_grouped_clustering)
kclusters = 5

chicago_grouped_clustering = chicago_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chicago_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 4, 1, 1, 1, 1, 4, 1, 4, 1], dtype=int32)

### 13, Add cluster lables
#### create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [24]:
### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

#chicago_merged = chicago_data
chicago_merged = neighborhoods_chi

# merge chicago_grouped with neighborhoods_chi to add latitude/longitude for each neighborhood
chicago_merged = chicago_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

chicago_merged.head() # check the last columns!

Unnamed: 0,Community_Area,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albany Park,"Albany Park, IL",41.971937,-87.716174,1.0,Sandwich Place,Grocery Store,Bakery,Mobile Phone Shop,Hookah Bar,Donut Shop,Chinese Restaurant,Latin American Restaurant,Cocktail Bar,Korean Restaurant
1,Albany Park,"Mayfair, IL",51.511087,-0.147058,1.0,Hotel,Boutique,French Restaurant,Art Gallery,Clothing Store,Seafood Restaurant,Steakhouse,Lounge,Café,Park
2,Albany Park,"North Mayfair, IL",33.462919,-111.75561,1.0,Pool,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service
3,Albany Park,"Ravenswood Manor, IL",41.964622,-87.70138,1.0,Train Station,Playground,Video Game Store,Mexican Restaurant,Convenience Store,Brewery,Museum,Garden,Park,Indoor Play Area
4,Archer Heights,"Archer Heights, IL",41.811422,-87.726165,1.0,Mexican Restaurant,Grocery Store,Mobile Phone Shop,Bank,Candy Store,Coffee Shop,Big Box Store,Gym / Fitness Center,Gas Station,Sandwich Place


### 14, Final - visualize the resulting clusters

In [30]:
## Final - visualize the resulting clusters using folium
## Note: Chicago_merged['Cluster Labels'].fillna(3).astype(np.int64) -- change NA to 3 and typecast lable to int64

# create map Chicago
map_clusters  = folium.Map(location=[latitude_chi, longitude_chi], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chicago_merged['Latitude'], chicago_merged['Longitude'], chicago_merged['Neighborhood'], (chicago_merged['Cluster Labels'].fillna(3)).astype(np.int64)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    
   # print((chicago_merged['Cluster Labels'].fillna(0).unique()))
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 15, Examine Clusters
#### Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. 

In [31]:
# Cluster 1
chicago_merged.loc[chicago_merged['Cluster Labels'] == 0, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,"Groveland Park, IL",Convenience Store,Music Venue,Falafel Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
68,"Edgewater Beach, IL",Food,Bar,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service
170,"North Lawndale, IL",Concert Hall,Athletics & Sports,Food,Convenience Store,Hostel,Dry Cleaner,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
176,"Norwood Park West, IL",Convenience Store,Cosmetics Shop,Falafel Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
177,"Old Norwood, IL",Food,Spa,Convention Center,Eastern European Restaurant,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space


In [32]:
## Cluster 2
chicago_merged.loc[chicago_merged['Cluster Labels'] == 1, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albany Park, IL",Sandwich Place,Grocery Store,Bakery,Mobile Phone Shop,Hookah Bar,Donut Shop,Chinese Restaurant,Latin American Restaurant,Cocktail Bar,Korean Restaurant
1,"Mayfair, IL",Hotel,Boutique,French Restaurant,Art Gallery,Clothing Store,Seafood Restaurant,Steakhouse,Lounge,Café,Park
2,"North Mayfair, IL",Pool,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service
3,"Ravenswood Manor, IL",Train Station,Playground,Video Game Store,Mexican Restaurant,Convenience Store,Brewery,Museum,Garden,Park,Indoor Play Area
4,"Archer Heights, IL",Mexican Restaurant,Grocery Store,Mobile Phone Shop,Bank,Candy Store,Coffee Shop,Big Box Store,Gym / Fitness Center,Gas Station,Sandwich Place
...,...,...,...,...,...,...,...,...,...,...,...
235,"Ukrainian Village, IL",Coffee Shop,Pub,Dive Bar,Ukrainian Restaurant,Eastern European Restaurant,Bakery,Grocery Store,Bar,Art Museum,Deli / Bodega
236,"West Town, IL",Dive Bar,Grocery Store,Mexican Restaurant,Bar,Pet Store,Toy / Game Store,Record Shop,Chinese Restaurant,Bank,Taco Place
237,"Wicker Park, IL",Pizza Place,Bookstore,Coffee Shop,Accessories Store,Breakfast Spot,Bar,Boutique,French Restaurant,Thrift / Vintage Store,Korean Restaurant
238,"Polish Downtown, IL",Hotel,American Restaurant,Italian Restaurant,Nightclub,Burger Joint,Exhibit,Coffee Shop,Sandwich Place,Gym,Theater


In [33]:
## Cluster 3
chicago_merged.loc[chicago_merged['Cluster Labels'] == 2, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,"Burnside, IL",Hotel,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service
70,"Lakewood / Balmoral, IL",ATM,Hotel,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant


In [34]:
## Cluster 4
chicago_merged.loc[chicago_merged['Cluster Labels'] == 3, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
136,"Beverly Woods, IL",Coffee Shop,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service
171,"Hollywood Park, IL",Coffee Shop,Yoga Studio,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Service


In [35]:
## Cluster 5
chicago_merged.loc[chicago_merged['Cluster Labels'] == 4, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,"Auburn Gresham, IL",Park,Food,Discount Store,Basketball Court,Yoga Studio,Exhibit,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
21,"West Humboldt Park, IL",Park,Lake,Baseball Field,Café,Plaza,Food Truck,History Museum,Museum,Beach,Yoga Studio
38,"Calumet Heights, IL",Bus Station,Gym / Fitness Center,Park,Deli / Bodega,Yoga Studio,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
46,"Marquette Park, IL",Park,Soccer Field,Liquor Store,Yoga Studio,Exhibit,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
49,"Clearing West, IL",Airport Terminal,Airport,Park,Yoga Studio,Fabric Shop,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
54,"Prairie Shores, IL",Park,Bus Station,Train Station,Gym / Fitness Center,Shopping Mall,Event Space,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
55,"South Commons, IL",Deli / Bodega,Bus Station,Park,Gym / Fitness Center,Rental Car Location,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
60,"Dunning, IL",Deli / Bodega,Park,Intersection,American Restaurant,Falafel Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant
73,"Hamilton Park, IL",Park,Clothing Store,Sandwich Place,Financial or Legal Service,Filipino Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Fish & Chips Shop
77,"Sauganash, IL",Park,Indian Restaurant,Fast Food Restaurant,Yoga Studio,Exhibit,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
