In [4]:
import numpy as np
import pandas as pd
import requests
import lxml.html as lh
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
#pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Libraries imported.


<h1>1. Import Data</h1>

In [5]:
url = 'https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Chicago#List_of_neighborhoods_by_community_area'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
#Store the contents of the website under doc
doc = lh.fromstring(page.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')
#Check the length of the first 12 rows
[len(T) for T in tr_elements[:12]]

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

In [6]:
tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    print(i,name)
    if i ==2:
        name=name[:-1]
    col.append((name,[]))

1 Neighborhood
2 Community area



In [7]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 2, the //tr data is not from our table 
    if len(T)!=2:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        data=data[:-1]
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

In [8]:
[len(C) for (title,C) in col]

[247, 247]

In [9]:
Dict={title:column for (title,column) in col}
chi_df=pd.DataFrame(Dict)
chi_df.head(10)

Unnamed: 0,Neighborhood,Community area
0,Albany Park,Albany Park
1,Altgeld Gardens,Riverdale
2,Andersonville,Edgewater
3,Archer Heights,Archer Heights
4,Armour Square,Armour Square
5,Ashburn,Ashburn
6,Ashburn Estates,Ashburn
7,Auburn Gresham,Auburn Gresham
8,Avalon Park,Avalon Park
9,Avondale,Avondale


In [10]:
chi_df.columns

Index(['Neighborhood', 'Community area'], dtype='object')

In [11]:
chi_super = chi_df[['Community area']].copy(deep=True)

In [12]:
chi_super.drop_duplicates(subset=None, keep='first', inplace=True)
chi_super.reset_index(drop=True,inplace=True)

In [13]:
print(chi_super.shape)
chi_super.head(27)

(83, 1)


Unnamed: 0,Community area
0,Albany Park
1,Riverdale
2,Edgewater
3,Archer Heights
4,Armour Square
5,Ashburn
6,Auburn Gresham
7,Avalon Park
8,Avondale
9,Irving Park


In [14]:
chi_sub = chi_df[['Neighborhood']].copy(deep=True)

In [15]:
chi_sub.head()

Unnamed: 0,Neighborhood
0,Albany Park
1,Altgeld Gardens
2,Andersonville
3,Archer Heights
4,Armour Square


In [16]:
chi_sub.shape[0]

247

In [24]:
chi_super.loc[0, 'Community area']

'Albany Park'

In [39]:
chi_super.loc[46:82,'Community area']
chi_super.drop([68, 69, 75, 79, 82], axis=0, inplace=True)
chi_super.tail(10)
#remove 68, 69, 75, 79, 82

Unnamed: 0,Community area
70,Portage Park
71,Lakeview
72,South Chicago
73,South Deering
74,Washington Park
76,West Elsdon
77,West Englewood
78,West Garfield Park
80,West Pullman
81,Woodlawn


In [41]:
chi_super.reset_index(drop=True, inplace=True)
chi_super.tail(20)

Unnamed: 0,Community area
58,South Lawndale
59,The Loop
60,Rogers Park
61,McKinley Park
62,Montclare
63,Mount Greenwood
64,North Center
65,West Ridge
66,Oakland
67,O'Hare


In [18]:
url = 'https://raw.githubusercontent.com/raoulkumar/IBM_Capstone/master/chicago_neighborhoods.csv'
chi_hood = pd.read_csv(url)
chi_hood.drop([76], inplace=True)
chi_hood.tail()

Unnamed: 0,Community area,Latitude,Longitude
71,Washington Park,41.7945,-87.616
72,West Elsdon,41.7929,-87.7222
73,West Englewood,41.7781,-87.6667
74,West Pullman,41.6716,-87.6333
75,Woodlawn,41.7806,-87.5915


<h1> 2. Visualize neighborhoods </h1>

In [19]:
latitude = 41.869
longitude = -87.6298

map_chi = folium.Map(location=[latitude, longitude], zoom_start=10.5)

In [20]:
# add markers to map
for lat, lng, label in zip(chi_hood['Latitude'], chi_hood['Longitude'], chi_hood['Community area']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [float(lat), float(lng)],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chi)  
    
map_chi

<h1> 3. Retrieve FourSquare data on neighborhoods </h1>

In [51]:
CLIENT_ID = '5BEOIZX4NVVNZMPLFOG5COVRJSPOUXV15TTJK1GOOGL3ZDBM' # your Foursquare ID
CLIENT_SECRET = '1KAIQW41MA2Z3JZBPIJUY1JEY4IT0R2CLPFNE4D0VBF4RCOM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
#limit of results and radius of 800 meters half a mile
LIMIT = 200
radius = 750

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5BEOIZX4NVVNZMPLFOG5COVRJSPOUXV15TTJK1GOOGL3ZDBM
CLIENT_SECRET:1KAIQW41MA2Z3JZBPIJUY1JEY4IT0R2CLPFNE4D0VBF4RCOM


In [52]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [53]:
chi_venues = getNearbyVenues(names=chi_hood['Community area'],
                                   latitudes=chi_hood['Latitude'],
                                   longitudes=chi_hood['Longitude']
                                  )

Albany Park
Riverdale
Edgewater
Archer Heights
Armour Square
Ashburn
Auburn Gresham
Avalon Park
Avondale
Irving Park
New City
Belmont Craign
Hermosa
Dunning
Beverly
Morgan Park
Norwood Park
Lake View
Lincoln Square
Washington Heights
Bridgeport
Brighton Park
Douglas
Logan Square
Uptown
Burnside
Near North Side
Calumet Heights
Near South Side
Chatham
Chicago Lawn
Clearing
Pullman
North Lawndale
East Garfield Park
Hyde Park
Lower West Side
East Side
West Town
Forest Glen
Edison Park
Englewood
Roseland
West Lawn
Fuller Park
Near West Side 
Gage Park
Austin
Garfield Ridge
Jefferson Park
Grand Boulevard
Greater Grand Crossing
Hegewisch
North Park
Humboldt Park
South Shore
Kenwood
Lincoln Park
South Lawndale
The Loop
Rogers Park
McKinley Park
Montclare
Mount Greenwood
North Center
West Ridge
Oakland
O'Hare
Portage Park
South Chicago
South Deering
Washington Park
West Elsdon
West Englewood
West Pullman
Woodlawn


In [54]:
print(chi_venues.shape)
chi_venues.tail()

(1609, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1604,Woodlawn,41.7806,-87.5915,CTA - 63rd & Dorchester,41.780468,-87.591721,Bus Station
1605,Woodlawn,41.7806,-87.5915,Leon's Barbecue,41.78075,-87.58786,BBQ Joint
1606,Woodlawn,41.7806,-87.5915,Tres Original Pancakes,41.780553,-87.587787,Food
1607,Woodlawn,41.7806,-87.5915,Haggerty Field,41.778195,-87.588481,Baseball Field
1608,Woodlawn,41.7806,-87.5915,South Side YMCA,41.779749,-87.587017,Athletics & Sports


In [55]:
print('There are {} uniques categories.'.format(len(chi_venues['Venue Category'].unique())))

There are 232 uniques categories.


<h1> 4. Normalize data for each neighborhood so we can cluster </h1>

In [56]:
# one hot encoding
chi_onehot = pd.get_dummies(chi_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chi_onehot['Neighborhood'] = chi_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [chi_onehot.columns[-1]] + list(chi_onehot.columns[:-1])
chi_onehot = chi_onehot[fixed_columns]

chi_onehot.columns

Index(['Neighborhood', 'ATM', 'Adult Boutique', 'African Restaurant',
       'American Restaurant', 'Antique Shop', 'Arcade', 'Arepa Restaurant',
       'Art Gallery', 'Art Museum',
       ...
       'Vegetarian / Vegan Restaurant', 'Video Game Store', 'Video Store',
       'Vietnamese Restaurant', 'Weight Loss Center', 'Whisky Bar',
       'Wine Shop', 'Wings Joint', 'Women's Store', 'Yoga Studio'],
      dtype='object', length=233)

In [57]:
print(chi_onehot.shape)
chi_grouped = chi_onehot.groupby('Neighborhood').mean().reset_index()
print(chi_grouped.shape)
chi_grouped

(1609, 233)
(73, 233)


Unnamed: 0,Neighborhood,ATM,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Art Gallery,Art Museum,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Weight Loss Center,Whisky Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Albany Park,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
1,Archer Heights,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.037037,0.037037,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
2,Armour Square,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
3,Ashburn,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
4,Auburn Gresham,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
5,Austin,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
6,Avalon Park,0.083333,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
7,Avondale,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.028571,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
8,Beverly,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00,0.000000,0.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.058824,0.000000
9,Bridgeport,0.000000,0.0,0.00,0.043478,0.000000,0.0,0.00,0.043478,0.00,...,0.000000,0.021739,0.021739,0.000000,0.000000,0.00,0.000000,0.043478,0.000000,0.000000


In [58]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [59]:
num_top_venues = 7

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = chi_grouped['Neighborhood']

for ind in np.arange(chi_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(chi_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
0,Albany Park,Pizza Place,Korean Restaurant,Discount Store,Dive Bar,Coffee Shop,Mexican Restaurant,Hookah Bar
1,Archer Heights,Mexican Restaurant,Gas Station,Bakery,Convenience Store,Pizza Place,Bar,Seafood Restaurant
2,Armour Square,Chinese Restaurant,Hot Dog Joint,Italian Restaurant,Intersection,Light Rail Station,Sports Bar,Asian Restaurant
3,Ashburn,Intersection,Liquor Store,Pizza Place,Italian Restaurant,Cosmetics Shop,Fast Food Restaurant,Bus Station
4,Auburn Gresham,Park,Currency Exchange,Cosmetics Shop,Discount Store,Basketball Court,Eye Doctor,Food & Drink Shop


<h1> 5. Cluster Neighborhoods using K-means clustering algorithm K=5 clusters </h1>

5 clusters was chosen after an iterative process of k=3 to k=7 determining the best clusters to provide the best information for small businesses

In [60]:
# set number of clusters
chi_grouped_clustering = chi_grouped.drop('Neighborhood', 1)
chi_grouped_clustering.head()

Unnamed: 0,ATM,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Weight Loss Center,Whisky Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.037037,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [61]:
# run k-means clustering
kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chi_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 2, 1, 1, 0, 1, 1, 1, 1, 1], dtype=int32)

In [62]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

chi_merged = chi_hood

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
chi_merged = chi_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Community area')
#tor_merged['Cluster Labels'] = int(tor_merged['Cluster Labels'])
chi_merged.head() # check the last columns!

Unnamed: 0,Community area,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
0,Albany Park,41.9683,-87.728,1.0,Pizza Place,Korean Restaurant,Discount Store,Dive Bar,Coffee Shop,Mexican Restaurant,Hookah Bar
1,Riverdale,41.6611,-87.6038,4.0,Grocery Store,Yoga Studio,Ethiopian Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market
2,Edgewater,41.9837,-87.6601,1.0,Asian Restaurant,Bank,Optical Shop,Bus Station,Sandwich Place,Pizza Place,Sushi Restaurant
3,Archer Heights,41.8079,-87.7236,2.0,Mexican Restaurant,Gas Station,Bakery,Convenience Store,Pizza Place,Bar,Seafood Restaurant
4,Armour Square,41.8408,-87.634,1.0,Chinese Restaurant,Hot Dog Joint,Italian Restaurant,Intersection,Light Rail Station,Sports Bar,Asian Restaurant


In [63]:
chi_merged_archive = chi_merged
chi_merged.dropna(axis=0, inplace=True)
chi_merged.reset_index(drop=True,inplace=True)
chi_merged['Cluster Labels']

0     1.0
1     4.0
2     1.0
3     2.0
4     1.0
5     1.0
6     0.0
7     1.0
8     1.0
9     1.0
10    2.0
11    2.0
12    1.0
13    1.0
14    0.0
15    0.0
16    1.0
17    1.0
18    1.0
19    1.0
20    2.0
21    1.0
22    1.0
23    1.0
24    0.0
25    1.0
26    1.0
27    1.0
28    0.0
29    1.0
     ... 
43    1.0
44    2.0
45    1.0
46    1.0
47    1.0
48    1.0
49    1.0
50    1.0
51    3.0
52    1.0
53    1.0
54    1.0
55    1.0
56    2.0
57    1.0
58    1.0
59    1.0
60    2.0
61    1.0
62    1.0
63    1.0
64    0.0
65    1.0
66    1.0
67    1.0
68    1.0
69    2.0
70    1.0
71    0.0
72    1.0
Name: Cluster Labels, Length: 73, dtype: float64

<h1> 6. Visualize Clusters and determine demographic and function of each cluster</h1>

In [64]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chi_merged['Latitude'], chi_merged['Longitude'], chi_merged['Community area'], chi_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [float(lat), float(lon)],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [65]:
neighborhoods_venues_sorted['Cluster Labels'].value_counts()

1    53
2     9
0     9
4     1
3     1
Name: Cluster Labels, dtype: int64

<h2> Cluster #0 has areas of town lacking restaurants in the south side of the city  </h2>

In [49]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 0.0].head(53)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
4,0,Auburn Gresham,Park,Currency Exchange,Cosmetics Shop,Discount Store,Basketball Court,Eye Doctor,Food & Drink Shop
11,0,Burnside,Motel,Park,Intersection,Yoga Studio,Exhibit,Food,Flower Shop
13,0,Chatham,Park,Fried Chicken Joint,Discount Store,Boutique,Donut Shop,Moving Target,Fast Food Restaurant
22,0,Englewood,Fast Food Restaurant,Discount Store,Train Station,Park,Nightclub,Yoga Studio,Exhibit
24,0,Fuller Park,Park,Train Station,Fast Food Restaurant,Adult Boutique,Department Store,Baseball Field,Bus Station
43,0,Morgan Park,Currency Exchange,Train Station,Park,Ethiopian Restaurant,Food & Drink Shop,Food,Flower Shop
52,0,Norwood Park,Park,Speakeasy,Bowling Alley,Clothing Store,Exhibit,Food & Drink Shop,Food
54,0,Oakland,Beach,Park,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flower Shop
69,0,West Pullman,Pool,Restaurant,Park,Caribbean Restaurant,Deli / Bodega,Ethiopian Restaurant,Flower Shop


<h2> Cluster #1 has the most neighborhoods, these are highly populated areas with many restaurants  </h2>

In [50]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 1.0].head(53)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
0,1,Albany Park,Pizza Place,Korean Restaurant,Discount Store,Dive Bar,Coffee Shop,Mexican Restaurant,Hookah Bar
2,1,Armour Square,Chinese Restaurant,Hot Dog Joint,Italian Restaurant,Intersection,Light Rail Station,Sports Bar,Asian Restaurant
3,1,Ashburn,Intersection,Liquor Store,Pizza Place,Italian Restaurant,Cosmetics Shop,Fast Food Restaurant,Bus Station
5,1,Austin,Breakfast Spot,Discount Store,Food,Fried Chicken Joint,Café,Thrift / Vintage Store,Athletics & Sports
6,1,Avalon Park,Burger Joint,Boutique,ATM,Sandwich Place,Fast Food Restaurant,Grocery Store,Cajun / Creole Restaurant
7,1,Avondale,Mexican Restaurant,Food Truck,Coffee Shop,South American Restaurant,Beer Store,Big Box Store,Korean Restaurant
8,1,Beverly,Pizza Place,BBQ Joint,Discount Store,Cosmetics Shop,Convenience Store,Caribbean Restaurant,Shopping Mall
9,1,Bridgeport,Chinese Restaurant,Pizza Place,Bar,Korean Restaurant,Wings Joint,Grocery Store,American Restaurant
12,1,Calumet Heights,Construction & Landscaping,Gym / Fitness Center,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flower Shop
14,1,Chicago Lawn,American Restaurant,Fish & Chips Shop,Intersection,Fast Food Restaurant,Yoga Studio,Exhibit,Food Court


<h2> In Cluster #2 the most common venue across all neighborhoods is mexican restaurants, with not many other restaurants. These neighborhoods are predominantly latino and hispanic </h2>

In [45]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 2.0].head(15)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
1,2,Archer Heights,Mexican Restaurant,Gas Station,Bakery,Convenience Store,Pizza Place,Bar,Seafood Restaurant
10,2,Brighton Park,Mexican Restaurant,Pizza Place,Grocery Store,Check Cashing Service,Burger Joint,Bus Station,Café
25,2,Gage Park,Mexican Restaurant,Bakery,Sporting Goods Shop,Convenience Store,Dessert Shop,Sandwich Place,Chinese Restaurant
30,2,Hermosa,Mexican Restaurant,Park,Latin American Restaurant,Fast Food Restaurant,BBQ Joint,Greek Restaurant,Asian Restaurant
40,2,Lower West Side,Mexican Restaurant,Food,Brewery,Flower Shop,Supermarket,Music Venue,Bar
42,2,Montclare,Mexican Restaurant,Pizza Place,Flea Market,Financial or Legal Service,Fast Food Restaurant,Exhibit,Food Court
48,2,New City,Mexican Restaurant,Shoe Store,Bank,Liquor Store,Check Cashing Service,Discount Store,Laundromat
60,2,South Lawndale,Mexican Restaurant,Nightclub,Pharmacy,Restaurant,Fast Food Restaurant,Taco Place,Gas Station
66,2,West Elsdon,Mexican Restaurant,Gym / Fitness Center,Bakery,Fast Food Restaurant,Donut Shop,Supermarket,Ice Cream Shop


<h2> Cluster #3 is an outlier that is not similar to any of the other neighborhoods, this area is lacking restaurants </h2>

In [46]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 3.0].head(15)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
51,3,North Park,Department Store,Gymnastics Gym,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flower Shop


<h2> Cluster #4 is another outlier that is not similar to the other neighborhoods, still appears to have restaurants, specifically ethiopian restaurants also appears to be a lot of grocery stores which means it must still be a populated area  </h2>

In [47]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 4.0].head(15)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
56,4,Riverdale,Grocery Store,Yoga Studio,Ethiopian Restaurant,Food & Drink Shop,Food,Flower Shop,Flea Market
