# IBM Applied Data Science Capstone Course by Coursera

## Week 5 Final Report

## Opening an Indian Restaurant in Mississauga, Canada

* Build a dataframe of neighborhoods in Mississauga, Canada by web scraping the data from Wikipedia page
* Get the geographical coordinates of the neighborhoods
* Obtain the venue data for the neighborhoods from Foursquare API
* Explore and cluster the neighborhoods
* Select the best cluster to open a new Indian Restaurant

## 1. Import libraries

In [6]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

## 2. Scrap data from Wikipedia page into a DataFrame

In [7]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Mississauga").text

In [8]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [9]:
# create a list to store neighborhood data
neighborhoodList = []

In [10]:
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [11]:
df = pd.DataFrame({"Neighborhood": neighborhoodList})

df.head()

Unnamed: 0,Neighborhood
0,Template:Communities of Mississauga
1,Churchill Meadows
2,"Clarkson, Mississauga"
3,Cooksville (Mississauga)
4,"Dixie, Mississauga"


In [12]:
df.drop(df.index[0])

Unnamed: 0,Neighborhood
1,Churchill Meadows
2,"Clarkson, Mississauga"
3,Cooksville (Mississauga)
4,"Dixie, Mississauga"
5,"Elmbank, Ontario"
6,Erin Mills
7,"Erindale, Mississauga"
8,"Lakeview, Mississauga"
9,"Lisgar, Ontario"
10,Lorne Park


In [13]:
df.shape

(18, 1)

## 3. Get the geographical coordinates

In [14]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Mississauga,Canada'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [16]:
coords = [ get_latlng(neighborhood) for neighborhood in df["Neighborhood"].tolist() ]

In [17]:
coords

[[46.18341000000004, -83.02049999999997],
 [43.54834847752596, -79.74192778375495],
 [43.51397109965278, -79.61999463287037],
 [43.580040000000054, -79.61614999999995],
 [43.63368000000003, -79.61827999999997],
 [43.69100916009244, -79.60804559810617],
 [43.54101000000003, -79.68153999999998],
 [43.546550000000025, -79.65218999999996],
 [43.579360000000065, -79.55615999999998],
 [43.589600000000075, -79.78110999999996],
 [43.53146000000004, -79.62200999999999],
 [43.598928826074726, -79.57848603624613],
 [43.62776000000008, -79.72695999999996],
 [43.62776000000008, -79.72695999999996],
 [43.58502000000004, -79.76058999999998],
 [43.551420000000064, -79.58588999999995],
 [43.631054851885814, -79.60346869290403],
 [43.571708872585106, -79.70021637659542]]

In [18]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [19]:
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']

In [20]:
print(df.shape)
df

(18, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Template:Communities of Mississauga,46.18341,-83.0205
1,Churchill Meadows,43.548348,-79.741928
2,"Clarkson, Mississauga",43.513971,-79.619995
3,Cooksville (Mississauga),43.58004,-79.61615
4,"Dixie, Mississauga",43.63368,-79.61828
5,"Elmbank, Ontario",43.691009,-79.608046
6,Erin Mills,43.54101,-79.68154
7,"Erindale, Mississauga",43.54655,-79.65219
8,"Lakeview, Mississauga",43.57936,-79.55616
9,"Lisgar, Ontario",43.5896,-79.78111


In [21]:
df=df.drop(df.index[0]) #Dropping irrelevant data
df

Unnamed: 0,Neighborhood,Latitude,Longitude
1,Churchill Meadows,43.548348,-79.741928
2,"Clarkson, Mississauga",43.513971,-79.619995
3,Cooksville (Mississauga),43.58004,-79.61615
4,"Dixie, Mississauga",43.63368,-79.61828
5,"Elmbank, Ontario",43.691009,-79.608046
6,Erin Mills,43.54101,-79.68154
7,"Erindale, Mississauga",43.54655,-79.65219
8,"Lakeview, Mississauga",43.57936,-79.55616
9,"Lisgar, Ontario",43.5896,-79.78111
10,Lorne Park,43.53146,-79.62201


In [22]:
df.shape

(17, 3)

In [25]:
# save the DataFrame as CSV file
df.to_csv("df.csv", index=False)

## 4. Create a map of Missisauga with neighborhoods superimposed on top

In [24]:
# get the coordinates of Mississauga
address = 'Mississauga, Canada'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Missisauga {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Missisauga 43.590338, -79.645729.


In [55]:
# create map of Mississauga using latitude and longitude values
map_miss = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_miss)  
    
map_miss

In [29]:
map_miss.save('map_kl.miss')

## 5. Use the Foursquare API to explore the neighborhoods

In [30]:
# define Foursquare Credentials and Version
CLIENT_ID = '2K0PDIBYFPPCFMJCIT5CY1AVPIMRH2NZWQZZG22RRDBR3XPC' # your Foursquare ID
CLIENT_SECRET = '0MH2UNRTCTDVYIMILHXJSVHAK4FWOTFXQEZ5FXW5YY15ZLNU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2K0PDIBYFPPCFMJCIT5CY1AVPIMRH2NZWQZZG22RRDBR3XPC
CLIENT_SECRET:0MH2UNRTCTDVYIMILHXJSVHAK4FWOTFXQEZ5FXW5YY15ZLNU


**Now, let's get the top 100 venues that are within a radius of 2000 meters.**

In [31]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [32]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1004, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Churchill Meadows,43.548348,-79.741928,Britannia Italian Bakery,43.561891,-79.752455,Italian Restaurant
1,Churchill Meadows,43.548348,-79.741928,Starbucks,43.559663,-79.733694,Coffee Shop
2,Churchill Meadows,43.548348,-79.741928,Tim Hortons,43.553634,-79.72279,Coffee Shop
3,Churchill Meadows,43.548348,-79.741928,Pioneer Energy,43.561014,-79.734373,Gas Station
4,Churchill Meadows,43.548348,-79.741928,Nations Fresh Foods,43.550826,-79.718164,Grocery Store


**Let's check how many venues were returned for each neighorhood**

In [33]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Churchill Meadows,31,31,31,31,31,31
"Clarkson, Mississauga",41,41,41,41,41,41
Cooksville (Mississauga),79,79,79,79,79,79
"Dixie, Mississauga",100,100,100,100,100,100
"Elmbank, Ontario",100,100,100,100,100,100
Erin Mills,61,61,61,61,61,61
"Erindale, Mississauga",32,32,32,32,32,32
"Lakeview, Mississauga",49,49,49,49,49,49
"Lisgar, Ontario",67,67,67,67,67,67
Lorne Park,30,30,30,30,30,30


**Let's find out how many unique categories can be curated from all the returned venues**

In [34]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 152 uniques categories.


In [35]:
venues_df['VenueCategory'].unique()[:50]

array(['Italian Restaurant', 'Coffee Shop', 'Gas Station',
       'Grocery Store', 'Ice Cream Shop', 'Sandwich Place', 'Café',
       'Convenience Store', 'Bank', 'Pharmacy', 'Pizza Place',
       'Wings Joint', 'Gym / Fitness Center', 'Park', 'Indian Restaurant',
       'Nail Salon', 'Mediterranean Restaurant', 'Hardware Store',
       'Pakistani Restaurant', 'Food & Drink Shop', 'Tennis Court',
       'Breakfast Spot', 'Furniture / Home Store', 'Liquor Store',
       'Trail', 'Dog Run', 'Bar', 'Flower Shop', 'Hobby Shop',
       'Irish Pub', 'Mexican Restaurant', 'Sushi Restaurant',
       'Japanese Restaurant', 'Restaurant', 'Greek Restaurant',
       'Supermarket', 'Shopping Mall', 'Cosmetics Shop', 'Burrito Place',
       'Korean Restaurant', 'Caribbean Restaurant',
       'Middle Eastern Restaurant', 'Farmers Market',
       'Portuguese Restaurant', 'Bakery', 'Discount Store', 'BBQ Joint',
       'Vietnamese Restaurant', 'Gym', 'Fried Chicken Joint'],
      dtype=object)

## 6. Analyze Each Neighborhood

In [38]:
# one hot encoding
miss_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
miss_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [miss_onehot.columns[-1]] + list(miss_onehot.columns[:-1])
miss_onehot = miss_onehot[fixed_columns]

print(miss_onehot.shape)
miss_onehot.head()

(1004, 153)


Unnamed: 0,Neighborhoods,Accessories Store,Airport,Airport Lounge,Airport Service,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Bar,Beer Store,Big Box Store,Bistro,Bookstore,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Convenience Store,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Duty-free Shop,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Flower Shop,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gastropub,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hobby Shop,Hookah Bar,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Iraqi Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Laser Tag,Light Rail Station,Liquor Store,Market,Martial Arts Dojo,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Motorcycle Shop,Music Store,Nail Salon,Nightclub,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Repair,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,South American Restaurant,Sporting Goods Shop,Sports Bar,Stables,Steakhouse,Student Center,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Temple,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yemeni Restaurant
0,Churchill Meadows,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Churchill Meadows,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Churchill Meadows,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Churchill Meadows,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Churchill Meadows,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


**Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category**

In [39]:
miss_grouped = miss_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(miss_grouped.shape)
miss_grouped

(17, 153)


Unnamed: 0,Neighborhoods,Accessories Store,Airport,Airport Lounge,Airport Service,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Bar,Beer Store,Big Box Store,Bistro,Bookstore,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Convenience Store,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Duty-free Shop,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Flower Shop,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gastropub,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hobby Shop,Hookah Bar,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Iraqi Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Laser Tag,Light Rail Station,Liquor Store,Market,Martial Arts Dojo,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Motorcycle Shop,Music Store,Nail Salon,Nightclub,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Repair,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,South American Restaurant,Sporting Goods Shop,Sports Bar,Stables,Steakhouse,Student Center,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Temple,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yemeni Restaurant
0,Churchill Meadows,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.096774,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.096774,0.0,0.0,0.0,0.064516,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.032258,0.064516,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.032258,0.0,0.032258,0.0,0.064516,0.129032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0
1,"Clarkson, Mississauga",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.073171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.121951,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.02439,0.02439,0.0,0.02439,0.097561,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.073171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cooksville (Mississauga),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.025316,0.037975,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.037975,0.0,0.012658,0.037975,0.0,0.0,0.075949,0.0,0.012658,0.0,0.0,0.012658,0.0,0.012658,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.063291,0.025316,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.025316,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.063291,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.012658,0.025316,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.025316,0.0,0.025316,0.037975,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.088608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.025316,0.0,0.012658,0.0,0.0
3,"Dixie, Mississauga",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.02,0.03,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.13,0.01,0.0,0.0,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.06,0.01,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.03,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.05,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Elmbank, Ontario",0.0,0.01,0.06,0.02,0.04,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.17,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.12,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.03,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Erin Mills,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.016393,0.016393,0.0,0.0,0.0,0.016393,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.016393,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.081967,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.016393,0.0,0.0,0.0,0.016393,0.0,0.0,0.065574,0.0,0.0,0.0,0.032787,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.016393,0.0,0.016393,0.0,0.032787,0.016393,0.0,0.016393,0.0,0.016393,0.0,0.0,0.0,0.016393,0.016393,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04918,0.0,0.032787,0.032787,0.016393,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.032787,0.0,0.065574,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.016393,0.0,0.032787,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.016393,0.0,0.0,0.0,0.016393
6,"Erindale, Mississauga",0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.03125,0.0,0.03125,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.09375,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Lakeview, Mississauga",0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.040816,0.0,0.0,0.0,0.040816,0.0,0.040816,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.020408,0.061224,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.0,0.040816,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.061224,0.020408,0.020408,0.061224,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.020408,0.020408,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.020408,0.0,0.020408,0.0,0.0
8,"Lisgar, Ontario",0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.029851,0.0,0.0,0.0,0.014925,0.014925,0.0,0.0,0.029851,0.0,0.014925,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.119403,0.0,0.014925,0.014925,0.0,0.0,0.044776,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.059701,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.044776,0.014925,0.0,0.0,0.014925,0.0,0.014925,0.014925,0.0,0.0,0.0,0.029851,0.014925,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.029851,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.044776,0.0,0.044776,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.029851,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.0
9,Lorne Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.066667,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.033333,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0


In [40]:
len(miss_grouped[miss_grouped["Indian Restaurant"] > 0])

12

**Create a new DataFrame for Indian Restaurant data only**

In [41]:
miss_ir = miss_grouped[["Neighborhoods","Indian Restaurant"]]
miss_ir.head()

Unnamed: 0,Neighborhoods,Indian Restaurant
0,Churchill Meadows,0.064516
1,"Clarkson, Mississauga",0.02439
2,Cooksville (Mississauga),0.025316
3,"Dixie, Mississauga",0.02
4,"Elmbank, Ontario",0.01


## 7. Cluster Neighborhoods

In [42]:
# set number of clusters
kclusters = 3

miss_clustering = miss_ir.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(miss_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype=int32)

In [44]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
miss_merged = miss_ir.copy()

# add clustering labels
miss_merged["Cluster Labels"] = kmeans.labels_

In [45]:
miss_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
miss_merged.head()

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels
0,Churchill Meadows,0.064516,2
1,"Clarkson, Mississauga",0.02439,1
2,Cooksville (Mississauga),0.025316,1
3,"Dixie, Mississauga",0.02,1
4,"Elmbank, Ontario",0.01,0


In [46]:
miss_merged = miss_merged.join(df.set_index("Neighborhood"), on="Neighborhood")

print(miss_merged.shape)
miss_merged.head() # check the last columns!

(17, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Churchill Meadows,0.064516,2,43.548348,-79.741928
1,"Clarkson, Mississauga",0.02439,1,43.513971,-79.619995
2,Cooksville (Mississauga),0.025316,1,43.58004,-79.61615
3,"Dixie, Mississauga",0.02,1,43.63368,-79.61828
4,"Elmbank, Ontario",0.01,0,43.691009,-79.608046


In [47]:
# sort the results by Cluster Labels
print(miss_merged.shape)
miss_merged.sort_values(["Cluster Labels"], inplace=True)
miss_merged

(17, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
8,"Lisgar, Ontario",0.014925,0,43.5896,-79.78111
14,Port Credit,0.014085,0,43.55142,-79.58589
13,"Meadowvale, Ontario",0.0,0,43.58502,-79.76059
10,"Malton, Mississauga",0.0,0,43.598929,-79.578486
7,"Lakeview, Mississauga",0.0,0,43.57936,-79.55616
6,"Erindale, Mississauga",0.0,0,43.54655,-79.65219
16,"Streetsville, Mississauga",0.013333,0,43.571709,-79.700216
4,"Elmbank, Ontario",0.01,0,43.691009,-79.608046
5,Erin Mills,0.0,0,43.54101,-79.68154
15,"Rockwood Village, Mississauga",0.02381,1,43.631055,-79.603469


**Finally, let's visualize the resulting clusters**

In [57]:
#create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(miss_merged['Latitude'], miss_merged['Longitude'], miss_merged['Neighborhood'], miss_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [50]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

## 8. Examine Clusters

**Cluster 0**

In [51]:
miss_merged.loc[miss_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
8,"Lisgar, Ontario",0.014925,0,43.5896,-79.78111
14,Port Credit,0.014085,0,43.55142,-79.58589
13,"Meadowvale, Ontario",0.0,0,43.58502,-79.76059
10,"Malton, Mississauga",0.0,0,43.598929,-79.578486
7,"Lakeview, Mississauga",0.0,0,43.57936,-79.55616
6,"Erindale, Mississauga",0.0,0,43.54655,-79.65219
16,"Streetsville, Mississauga",0.013333,0,43.571709,-79.700216
4,"Elmbank, Ontario",0.01,0,43.691009,-79.608046
5,Erin Mills,0.0,0,43.54101,-79.68154


**Cluster 1**

In [53]:
miss_merged.loc[miss_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
15,"Rockwood Village, Mississauga",0.02381,1,43.631055,-79.603469
9,Lorne Park,0.033333,1,43.53146,-79.62201
3,"Dixie, Mississauga",0.02,1,43.63368,-79.61828
11,Meadowvale (village),0.038462,1,43.62776,-79.72696
12,"Meadowvale Village, Ontario",0.038462,1,43.62776,-79.72696
2,Cooksville (Mississauga),0.025316,1,43.58004,-79.61615
1,"Clarkson, Mississauga",0.02439,1,43.513971,-79.619995


**Cluster 2**

In [54]:
miss_merged.loc[miss_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Churchill Meadows,0.064516,2,43.548348,-79.741928


## Observations:
Clearly the least number of Indian Restaurants is in the neighborhood of **Churchill Meadows** , making it the best neighborhood to start a new restaurant without immense competition.