# Capstone Project
## Investigating the bst neighborhood in which to open a new resaurant in Chicago

### Introduction: A new client plans to open a new Italian restaurant in Chicago and is currently considering options for where it should be located. He has hired us to find the best location, based on the restaurant options that are already available to residents in neighborhoods throughout the city.

### Data: We will utilize Foursquare to find information about the most common venues in each area. We will investigate which communities in Chicago have the highest need for restaurants and, in particular, which are most lacking options

In [2]:
import pandas as pd
import numpy as np
!pip install geopy



In [3]:
#Get list of neighborhoods & community areas in Chicago
html_chi = pd.read_html('https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Chicago')
chicago = html_chi[0]
chicago.rename(columns = {'Community area': 'Community Area'}, inplace = True)

In [4]:
#Create dataframe of latitudes and longitudes for each neighborhood
#Ignore neighborhoods not recognized by Nominatim
from geopy.geocoders import Nominatim

latlong = pd.DataFrame({'Neighborhood':[], 'Latitude':[], 'Longitude':[]})
locator = Nominatim(user_agent = 'my_Geocoder')

for i in range(len(chicago)):
    location = locator.geocode(chicago['Neighborhood'][i]+", Chicago")
    if location is not None:
        latlong = latlong.append(pd.Series([chicago['Neighborhood'][i], location.latitude, 
                                            location.longitude], index=latlong.columns ), ignore_index=True)
latlong.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Albany Park,41.971937,-87.716174
1,Altgeld Gardens,41.654864,-87.600446
2,Andersonville,41.977139,-87.669273
3,Archer Heights,41.811422,-87.726165
4,Armour Square,41.840033,-87.633107


In [19]:
#Remove unrecognized neighborhoods from original dataframe
#Remove incorrectly located neighborhoods (predetermined) from dataframe
chi_hood = chicago['Neighborhood'].tolist()
latlong_hood = latlong['Neighborhood'].tolist()
list = ['Big Oaks','New Eastside', 'West Chesterfield', 'Belmont Heights', 'Parkview', 'Old Town Triangle', "River's Edge", 'River North',
        'University Village', 'Central Station']

for i in chi_hood:
    if i not in latlong_hood:
        chicago = chicago[chicago.Neighborhood != i]
        
for j in list:
    chicago = chicago[chicago.Neighborhood != j]

In [20]:
#Join tables
chicago = pd.merge(chicago, latlong)

In [None]:
#Download required dependencies
import json 
import requests 
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

!pip install folium
import folium

In [26]:
chicago.head()

Unnamed: 0,Neighborhood,Community Area,Latitude,Longitude
0,Albany Park,Albany Park,41.971937,-87.716174
1,Altgeld Gardens,Riverdale,41.654864,-87.600446
2,Andersonville,Edgewater,41.977139,-87.669273
3,Archer Heights,Archer Heights,41.811422,-87.726165
4,Armour Square,Armour Square,41.840033,-87.633107


In [62]:
#create map
geolocator = Nominatim(user_agent="my_Geocoder")
location = geolocator.geocode('Chicago, USA')
latitude = location.latitude
longitude = location.longitude

map_chicago = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(chicago['Latitude'], chicago['Longitude'], chicago['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius= 5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chicago)  
    
map_chicago

In [63]:
#Define Foursquare credentials and version
CLIENT_ID = '4FTTSORKOF0JLCZSIUQ4JRFIF4IF3JDUNZNL0SUSZYYNAT5M' # your Foursquare ID
CLIENT_SECRET = 'YXVLLYIK4VOPWHMJR3EG3SBULU2EV35TSZSEZGOM4L33IVWX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [102]:
#Get 50 nearby venues for each neighborhood

def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 50):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [103]:
chicago_venues = getNearbyVenues(names=chicago['Neighborhood'],
                                   latitudes=chicago['Latitude'],
                                   longitudes=chicago['Longitude']
                                  )

Albany Park
Altgeld Gardens
Andersonville
Archer Heights
Armour Square
Ashburn
Auburn Gresham
Avalon Park
Avondale
Avondale Gardens
Back of the Yards
Belmont Central
Belmont Gardens
Belmont Terrace
Beverly
Beverly View
Beverly Woods
Boystown
Bowmanville
Brainerd
Brickyard
Bridgeport
Brighton Park
Bronzeville
Bucktown
Budlong Woods
Buena Park
Burnside
Cabrini–Green
Calumet Heights
Canaryville
Chatham
Chicago Lawn
Chinatown
Chrysler Village
Clarendon Park
Clearing East
Clearing West
Cragin
Crestline
Dearborn Homes
Dearborn Park
Douglas Park
Dunning
East Beverly
East Chatham
East Garfield Park
East Hyde Park
East Pilsen
East Side
East Village
Eden Green
Edgebrook
Edgewater
Edgewater Beach
Edgewater Glen
Edison Park
Englewood
Fernwood
Fifth City
Ford City
Forest Glen
Fuller Park
Gage Park
Galewood
The Gap
Garfield Ridge
Gladstone Park
Gold Coast
Golden Gate
Goose Island
Graceland West
Grand Boulevard
Grand Crossing
Greater Grand Crossing
Greektown
Gresham
Groveland Park
Hamilton Park
Hanso

In [104]:
#Explore venues
chicago_onehot = pd.get_dummies(chicago_venues[['Venue Category']], prefix="", prefix_sep="")

chicago_onehot['Neighborhood'] = chicago_venues['Neighborhood']

fixed_columns = [chicago_onehot.columns[-1]] + chicago_onehot.columns[:-1].tolist()
chicago_onehot = chicago_onehot[fixed_columns]

chicago_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,African Restaurant,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Arcade,...,Vietnamese Restaurant,Waste Facility,Water Park,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Albany Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Albany Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Albany Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Albany Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Albany Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
#Add community area to onehot table for additional grouping option
#chicago_onehot = pd.merge(chicago[['Neighborhood', 'Community Area']], chicago_onehot)

In [118]:
chicago_grouped = chicago_onehot.groupby('Neighborhood').mean().reset_index()
chicago_grouped.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,African Restaurant,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Arcade,...,Vietnamese Restaurant,Waste Facility,Water Park,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Albany Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Altgeld Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Andersonville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0
3,Archer Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0
4,Armour Square,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [123]:
#Determine most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [137]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = chicago_grouped['Neighborhood']

for ind in np.arange(chicago_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(chicago_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albany Park,Bakery,Fried Chicken Joint,Sandwich Place,Donut Shop,Grocery Store,Chinese Restaurant,Korean Restaurant,Karaoke Bar,Cocktail Bar,Diner
1,Altgeld Gardens,Park,Yoga Studio,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
2,Andersonville,Coffee Shop,Bakery,Italian Restaurant,Breakfast Spot,Beer Bar,Bar,Sushi Restaurant,Pizza Place,Mexican Restaurant,Frozen Yogurt Shop
3,Archer Heights,Mexican Restaurant,Grocery Store,Mobile Phone Shop,Ice Cream Shop,Coffee Shop,Sandwich Place,Gas Station,Bank,Big Box Store,Wings Joint
4,Armour Square,Chinese Restaurant,Cosmetics Shop,Italian Restaurant,Gas Station,Sandwich Place,Asian Restaurant,Grocery Store,Hot Dog Joint,Dance Studio,Field


In [138]:
#Cluster the results & combine with location data
kclusters = 5

chicago_grouped_clustering = chicago_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chicago_grouped_clustering)

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

chicago_merged = pd.merge(chicago, neighborhoods_venues_sorted)

chicago_merged.head()

Unnamed: 0,Neighborhood,Community Area,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albany Park,Albany Park,41.971937,-87.716174,2,Bakery,Fried Chicken Joint,Sandwich Place,Donut Shop,Grocery Store,Chinese Restaurant,Korean Restaurant,Karaoke Bar,Cocktail Bar,Diner
1,Altgeld Gardens,Riverdale,41.654864,-87.600446,3,Park,Yoga Studio,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
2,Andersonville,Edgewater,41.977139,-87.669273,2,Coffee Shop,Bakery,Italian Restaurant,Breakfast Spot,Beer Bar,Bar,Sushi Restaurant,Pizza Place,Mexican Restaurant,Frozen Yogurt Shop
3,Archer Heights,Archer Heights,41.811422,-87.726165,2,Mexican Restaurant,Grocery Store,Mobile Phone Shop,Ice Cream Shop,Coffee Shop,Sandwich Place,Gas Station,Bank,Big Box Store,Wings Joint
4,Armour Square,Armour Square,41.840033,-87.633107,4,Chinese Restaurant,Cosmetics Shop,Italian Restaurant,Gas Station,Sandwich Place,Asian Restaurant,Grocery Store,Hot Dog Joint,Dance Studio,Field


In [143]:
chicago_merged.sort_values(by = 'Cluster Labels')

Unnamed: 0,Neighborhood,Community Area,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
151,Peterson Park,West Ridge,41.989165,-87.720431,0,Nature Preserve,Soccer Field,Dance Studio,Italian Restaurant,Park,Yoga Studio,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
141,Oakland,Oakland,41.823653,-87.608242,0,Park,Boutique,Lake,Track,Discount Store,Public Art,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant
137,North Park,North Park,41.984955,-87.722933,0,Playground,Scenic Lookout,Gymnastics Gym,Park,Nature Preserve,Farm,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
85,Humboldt Park,Humboldt Park,41.905767,-87.704174,0,Food Truck,Park,Soccer Field,Museum,Café,Lake,Garden,Fishing Store,Fish Market,Fish & Chips Shop
66,Garfield Ridge,Garfield Ridge,41.803617,-87.745489,0,Discount Store,Park,Construction & Landscaping,Mexican Restaurant,Gas Station,Sandwich Place,Paper / Office Supplies Store,Yoga Studio,Empanada Restaurant,Ethiopian Restaurant
29,Calumet Heights,Calumet Heights,41.730035,-87.579213,0,Gym / Fitness Center,Bus Station,Pharmacy,Deli / Bodega,Park,Yoga Studio,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant
193,Vittum Park,Garfield Ridge,41.803356,-87.748531,0,Mexican Restaurant,Discount Store,Furniture / Home Store,Park,Yoga Studio,Field,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
170,Sauganash,Forest Glen,41.990036,-87.742289,0,Park,Indian Restaurant,Fast Food Restaurant,Pharmacy,Asian Restaurant,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
42,Douglas Park,North Lawndale,41.860066,-87.699355,0,Soccer Field,Sandwich Place,Café,Liquor Store,Music Venue,Park,Farmers Market,Ethiopian Restaurant,Event Space,Falafel Restaurant
124,Mount Greenwood,Mount Greenwood,41.698089,-87.708662,0,Cosmetics Shop,Mexican Restaurant,Home Service,Park,Yoga Studio,Field,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm


In [140]:
#New map with clusters
import math

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chicago_merged['Latitude'], chicago_merged['Longitude'], 
                                  chicago_merged['Neighborhood'], chicago_merged['Cluster Labels']):
    if math.isnan(cluster) == False: 
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
        cluster = int(cluster)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[cluster-1],
            fill=True,
            fill_color=rainbow[cluster-1],
            fill_opacity=0.7).add_to(map_clusters)
       
map_clusters