# Coursera Capstone Project
## Choosing the ideal location to open a Chinese Restaurant in Chicago

In [1]:
import random # library for random number generation
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

print('Libraries imported.')

Libraries imported.


### 1.Scrape the Wikipedia page for neighborhood information

In [2]:
# import the library we use to open URLs
import urllib.request

# specify which URL/web page we are going to be scraping
url = "https://en.wikipedia.org/wiki/Community_areas_in_Chicago"
# open the url using urllib.request and put the HTML into the page variable
page = urllib.request.urlopen(url)

### Using BeautifulSoup package to scrape the table

In [30]:
# import the BeautifulSoup library so we can parse HTML and XML documents
from bs4 import BeautifulSoup
import requests
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Community_areas_of_Chicago").text
soup = BeautifulSoup(data, 'html.parser')

# create a list to store neighborhood data
neigh = []

### Get the dataframe of 78 community areas in Chicago

In [31]:
right_table=soup.find_all('div', class_='mw-category')[0].findAll('li')

for row in right_table:
    neigh.append(row.text)

neigh = neigh[4:]
    
df = pd.DataFrame({'Neighborhood': neigh})
df['Neighborhood'] = df.Neighborhood.str.replace(', Chicago','', regex=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Neighborhood
0,Albany Park
1,Archer Heights
2,Armour Square
3,Ashburn
4,Auburn Gresham


In [14]:
# print the number of rows of the dataframe
df.head()

Unnamed: 0,Neighborhood
0,Albany Park
1,Archer Heights
2,Armour Square
3,Ashburn
4,Auburn Gresham


### 2.Get the geographical coordinates of the community areas

Import relevant libraries

In [7]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [45]:
import geocoder
# define a function to get coordinates
def get_coor(neighborhood):
    # initialize your variable to None
    lat_long_coords = None
    # loop until you get the coordinates
    while(lat_long_coords is None):
        g = geocoder.arcgis('{},IL,USA'.format(neighborhood))
        lat_long_coords = g.latlng
    return lat_long_coords

In [49]:
coords = [ get_coor(nei) for nei in df['Neighborhood'].tolist() ]

In [95]:
df_coor = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coor['Latitude']
df['Longitude'] = df_coor['Longitude']
df[15:]

Unnamed: 0,Neighborhood,Latitude,Longitude
15,Chicago Lawn,41.77543,-87.69634
16,Clearing,41.77809,-87.75978
17,Douglas,38.42458,-89.98259
18,Dunning,41.95274,-87.79651
19,East Garfield Park,41.87863,-87.70514
...,...,...,...
73,West Lawn,41.77315,-87.72445
74,West Pullman,41.67951,-87.64189
75,West Ridge,41.99975,-87.69284
76,West Town,41.89329,-87.65743


In [55]:
address = 'Albany Park Chicago'

lat = df.loc[0,'Latitude']
long = df.loc[0,'Longitude']

In [60]:
# create map of Chicago neighborhoods using latitude and longitude values
map_chicago = folium.Map(location=[lat,long], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], 
                                           df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chicago)  
    
map_chicago

In [59]:
# save the map as HTML file
map_chicago.save('map_chicago.html')

### 3. Use the Foursquare API to explore the neighborhoods

In [77]:
CLIENT_ID = 'C41VKH2DGZUFOXKPMNF51ZIWFVDWXXNXPEBCP3DLJBLBRFCY' # your Foursquare ID
CLIENT_SECRET = 'WMFRFL3TVPRTKOU3ZL1ZFXYBJKGNLGE13BQUG2YJZHDSU5M0' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: C41VKH2DGZUFOXKPMNF51ZIWFVDWXXNXPEBCP3DLJBLBRFCY
CLIENT_SECRET:WMFRFL3TVPRTKOU3ZL1ZFXYBJKGNLGE13BQUG2YJZHDSU5M0


### Search for Italian restaurants around Chicago Loop

In [96]:
address = 'Chicago Loop, Chicago, IL, USA'

latitude = 41.8837
longitude = -87.628858
print(latitude, longitude)

41.8837 -87.628858


In [106]:
search_query = 'Chinese Restaurant'
radius = 1000
print(search_query + ' .... OK!')

Chinese Restaurant .... OK!


In [107]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=C41VKH2DGZUFOXKPMNF51ZIWFVDWXXNXPEBCP3DLJBLBRFCY&client_secret=WMFRFL3TVPRTKOU3ZL1ZFXYBJKGNLGE13BQUG2YJZHDSU5M0&ll=41.8837,-87.628858&v=20180604&query=Chinese Restaurant&radius=1000&limit=30'

In [108]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ed073b247e0d6001b3acc70'},
 'response': {'venues': [{'id': '4f32527d19836c91c7cc560f',
    'name': 'Sixty-Five Chinese Restaurant',
    'location': {'address': '201 W Madison St',
     'lat': 41.881935,
     'lng': -87.633851,
     'labeledLatLngs': [{'label': 'display',
       'lat': 41.881935,
       'lng': -87.633851}],
     'distance': 458,
     'postalCode': '60606',
     'cc': 'US',
     'city': 'Chicago',
     'state': 'IL',
     'country': 'United States',
     'formattedAddress': ['201 W Madison St',
      'Chicago, IL 60606',
      'United States']},
    'categories': [{'id': '4bf58dd8d48988d145941735',
      'name': 'Chinese Restaurant',
      'pluralName': 'Chinese Restaurants',
      'shortName': 'Chinese',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1590719501',
    'hasPerk': False},
   {'id': '4b295853f964a520489d24e3',
    'na

In [109]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

  """


Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,...,location.formattedAddress,location.crossStreet,delivery.id,delivery.url,delivery.provider.name,delivery.provider.icon.prefix,delivery.provider.icon.sizes,delivery.provider.icon.name,venuePage.id,location.neighborhood
0,4f32527d19836c91c7cc560f,Sixty-Five Chinese Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1590719501,False,201 W Madison St,41.881935,-87.633851,"[{'label': 'display', 'lat': 41.881935, 'lng':...",458,...,"[201 W Madison St, Chicago, IL 60606, United S...",,,,,,,,,
1,4b295853f964a520489d24e3,65 Chinese Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1590719501,False,201 W Madison St,41.881909,-87.634165,"[{'label': 'display', 'lat': 41.88190920166713...",482,...,"[201 W Madison St (btwn Wells & Franklin St), ...",btwn Wells & Franklin St,,,,,,,,
2,4b65c1ecf964a520c7fd2ae3,Triple 1 Chinese Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1590719501,False,177 N State St,41.885425,-87.627963,"[{'label': 'display', 'lat': 41.88542492566269...",205,...,"[177 N State St (at W. Lake St.), Chicago, IL ...",at W. Lake St.,,,,,,,,
3,4d0a576d4abf721e2e52c85e,65 Chinese Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1590719501,False,315 S LaSalle St,41.878009,-87.632672,"[{'label': 'display', 'lat': 41.87800937208755...",707,...,"[315 S LaSalle St, Chicago, IL 60604, United S...",,,,,,,,,
4,531af2b7498ef3f61eba1e31,Yu Shan Chinese Restaurant,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",v-1590719501,False,17 E Ohio St,41.89225,-87.627305,"[{'label': 'display', 'lat': 41.89225, 'lng': ...",960,...,"[17 E Ohio St, Chicago, IL 60611, United States]",,,,,,,,,


#### Define information of interest and filter dataframe

In [112]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,neighborhood,id
0,Sixty-Five Chinese Restaurant,Chinese Restaurant,201 W Madison St,41.881935,-87.633851,"[{'label': 'display', 'lat': 41.881935, 'lng':...",458,60606,US,Chicago,IL,United States,"[201 W Madison St, Chicago, IL 60606, United S...",,,4f32527d19836c91c7cc560f
1,65 Chinese Restaurant,Chinese Restaurant,201 W Madison St,41.881909,-87.634165,"[{'label': 'display', 'lat': 41.88190920166713...",482,60606,US,Chicago,IL,United States,"[201 W Madison St (btwn Wells & Franklin St), ...",btwn Wells & Franklin St,,4b295853f964a520489d24e3
2,Triple 1 Chinese Restaurant,Chinese Restaurant,177 N State St,41.885425,-87.627963,"[{'label': 'display', 'lat': 41.88542492566269...",205,60601,US,Chicago,IL,United States,"[177 N State St (at W. Lake St.), Chicago, IL ...",at W. Lake St.,,4b65c1ecf964a520c7fd2ae3
3,65 Chinese Restaurant,Chinese Restaurant,315 S LaSalle St,41.878009,-87.632672,"[{'label': 'display', 'lat': 41.87800937208755...",707,60604,US,Chicago,IL,United States,"[315 S LaSalle St, Chicago, IL 60604, United S...",,,4d0a576d4abf721e2e52c85e
4,Yu Shan Chinese Restaurant,Restaurant,17 E Ohio St,41.89225,-87.627305,"[{'label': 'display', 'lat': 41.89225, 'lng': ...",960,60611,US,Chicago,IL,United States,"[17 E Ohio St, Chicago, IL 60611, United States]",,,531af2b7498ef3f61eba1e31


In [117]:
Crestaurant = dataframe_filtered.head()
Crestaurant

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,neighborhood,id
0,Sixty-Five Chinese Restaurant,Chinese Restaurant,201 W Madison St,41.881935,-87.633851,"[{'label': 'display', 'lat': 41.881935, 'lng':...",458,60606,US,Chicago,IL,United States,"[201 W Madison St, Chicago, IL 60606, United S...",,,4f32527d19836c91c7cc560f
1,65 Chinese Restaurant,Chinese Restaurant,201 W Madison St,41.881909,-87.634165,"[{'label': 'display', 'lat': 41.88190920166713...",482,60606,US,Chicago,IL,United States,"[201 W Madison St (btwn Wells & Franklin St), ...",btwn Wells & Franklin St,,4b295853f964a520489d24e3
2,Triple 1 Chinese Restaurant,Chinese Restaurant,177 N State St,41.885425,-87.627963,"[{'label': 'display', 'lat': 41.88542492566269...",205,60601,US,Chicago,IL,United States,"[177 N State St (at W. Lake St.), Chicago, IL ...",at W. Lake St.,,4b65c1ecf964a520c7fd2ae3
3,65 Chinese Restaurant,Chinese Restaurant,315 S LaSalle St,41.878009,-87.632672,"[{'label': 'display', 'lat': 41.87800937208755...",707,60604,US,Chicago,IL,United States,"[315 S LaSalle St, Chicago, IL 60604, United S...",,,4d0a576d4abf721e2e52c85e
4,Yu Shan Chinese Restaurant,Restaurant,17 E Ohio St,41.89225,-87.627305,"[{'label': 'display', 'lat': 41.89225, 'lng': ...",960,60611,US,Chicago,IL,United States,"[17 E Ohio St, Chicago, IL 60611, United States]",,,531af2b7498ef3f61eba1e31


In [118]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Chicago Loop Community area
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Chicago Loop',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Chinese restaurants as blue circle markers
for lat, lng, label in zip(Crestaurant.lat, Crestaurant.lng, Crestaurant.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

In [119]:
venues_map.save('venues_map.html')