In [1]:
#importing necessary libraries
import requests
import pandas as pd

In [2]:
#scrapping neighborhoods in Canada
url  = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
if page.status_code == 200:
    print('Page download successful')
else:
    print('Page download error. Error code: {}'.format(page.status_code))

Page download successful


In [3]:
df_html = pd.read_html(url, header=0, na_values = ['Not assigned'])[0]
df_html.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
#Drop the the rows on which the Borough is empty
df_html.dropna(subset=['Borough'], inplace=True)

In [5]:
#Check Neighborhood is empty but Borough exists
n_empty_neighborhood = df_html[df_html['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 1


In [6]:
#Show which neighborhood is emtpy but Borough exists
df_html[df_html['Neighbourhood'].isna()]

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,


In [7]:
df_html.shape

(211, 3)

In [8]:
#Replace empty Neighborhood with Borough name and check again
df_html['Neighbourhood'].fillna(df_html['Borough'], inplace=True)
n_empty_neighborhood = df_html[df_html['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [9]:
#Confirm that Queen's Park Neighborhood is not empty now:
df_html[df_html['Borough']=="Queen's Park"]

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,Queen's Park


In [10]:
#Group by Postcode / Borough
df_postcodes = df_html.groupby(['Postcode','Borough']).Neighbourhood.agg([('Neighbourhood', ', '.join)])
df_postcodes.reset_index(inplace=True)
df_postcodes.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [11]:
#Drop the the rows on which the Borough is empty
df_html.dropna(subset=['Borough'], inplace=True)

In [12]:
#Check Neighborhood is empty but Borough exists
n_empty_neighborhood = df_html[df_html['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [13]:
print('The shape of the dataset is:',df_postcodes.shape)

The shape of the dataset is: (103, 3)


In [14]:
type(df_postcodes)

pandas.core.frame.DataFrame

In [15]:
#to make it easier, we will store this in csv format.
#Export to .CSV
df_postcodes.to_csv('Toronto_Postcodes.csv')

In [16]:
import numpy as np

In [17]:
# Since GeoEncoder library is not working for me, I will use the csv file downloaded
#Read CSV file from link and load into dataframe
url_csv = 'http://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(url_csv)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [18]:
#use the previously cleaned data
df_neighborhoods = pd.read_csv('Toronto_Postcodes.csv',index_col=[0])
df_neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [19]:
# Make sure both dataframes have the same 
df_coordinates.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_neighborhoods.rename(columns={'Postcode': 'PostalCode'}, inplace=True)

In [20]:
# Merge both datasets
df_neighborhoods_coordinates = pd.merge(df_neighborhoods, df_coordinates, on='PostalCode')
df_neighborhoods_coordinates.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [21]:
# Check coordinates for a couple of neighborhoods
df_neighborhoods_coordinates[(df_neighborhoods_coordinates['PostalCode']=='M5G') |
                             (df_neighborhoods_coordinates['PostalCode']=='M2H') ]

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
17,M2H,North York,Hillcrest Village,43.803762,-79.363452
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [31]:
#Export to .CSV
df_neighborhoods_coordinates.to_csv('Toronto_Postcodes_2.csv')

In [32]:
import folium
from sklearn.cluster import KMeans

In [24]:
# Read .csv file from above
df = pd.read_csv('Toronto_Postcodes_2.csv', index_col=0)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [25]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [26]:
df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)

In [27]:
#count Bourough and Neighborhood
df.groupby('Borough').count()['Neighborhood']

Borough
Central Toronto      9
Downtown Toronto    18
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Queen's Park         1
Scarborough         17
West Toronto         6
York                 5
Name: Neighborhood, dtype: int64

In [28]:
df_toronto = df[df['Borough'].str.contains('Toronto')]
df_toronto.reset_index(inplace=True)
df_toronto.drop('index', axis=1, inplace=True)
df_toronto.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [29]:
#Check the number of neighborhoods
print(df_toronto.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto      9
Downtown Toronto    18
East Toronto         5
West Toronto         6
Name: Neighborhood, dtype: int64


In [33]:
#Create list with the Boroughs (to be used later)
boroughs = df_toronto['Borough'].unique().tolist()

In [34]:
#Obtain the coordinates from the dataset itself, just averaging Latitude/Longitude of the current dataset 
lat_toronto = df_toronto['Latitude'].mean()
lon_toronto = df_toronto['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat_toronto, lon_toronto))

The geographical coordinates of Toronto are 43.66726218421052, -79.38988323421053


In [35]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3)) #Random color

In [36]:
map_toronto = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], 
                                           df_toronto['Longitude'],
                                           df_toronto['Borough'], 
                                           df_toronto['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [37]:
CLIENT_ID = 'TXBA5V3OKXRTHWDQ3TBMYTWXDWI32TWXOFN3RZK4UZKFOKX1' # your Foursquare ID
CLIENT_SECRET = 'K5OGIUNZLQGD5PM4KRMFNQBH1YIZMX4NADPACIDWJJ4FWZB5' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius 

In [38]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [39]:
#Get venues for all neighborhoods in our dataset
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                latitudes=df_toronto['Latitude'],
                                longitudes=df_toronto['Longitude'])

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The 

In [40]:
#Check size of resulting dataframe
toronto_venues.shape

(1715, 7)

In [41]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [42]:
#Number of venues per neighborhood
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton, Exhibition Place, Parkdale Village",23,23,23,23,23,23
Business Reply Mail Processing Centre 969 Eastern,19,19,19,19,19,19
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",45,45,45,45,45,45
Central Bay Street,89,89,89,89,89,89
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,85,85,85,85,85,85


In [43]:
#Number of unique venue categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 238 uniques categories.


In [44]:
#print out the list of categories
toronto_venues['Venue Category'].unique()[:100]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood',
       'Greek Restaurant', 'Cosmetics Shop', 'Ice Cream Shop',
       'Italian Restaurant', 'Brewery', 'Yoga Studio', 'Juice Bar',
       'Fruit & Vegetable Store', 'Pizza Place', 'Dessert Shop',
       'Restaurant', 'Bubble Tea Shop', 'Bookstore',
       'Furniture / Home Store', 'Grocery Store', 'Spa', 'Diner',
       'Indian Restaurant', 'Coffee Shop', 'Caribbean Restaurant',
       'Bakery', 'Café', 'American Restaurant', 'Sports Bar',
       'Liquor Store', 'Burger Joint', 'Gym', 'Fish & Chips Shop', 'Park',
       'Sushi Restaurant', 'Pet Store', 'Burrito Place', 'Steakhouse',
       'Fast Food Restaurant', 'Movie Theater', 'Sandwich Place',
       'Board Shop', 'Fish Market', 'Gay Bar', 'Seafood Restaurant',
       'Cheese Shop', 'Middle Eastern Restaurant', 'Thai Restaurant',
       'Comfort Food Restaurant', 'Chinese Restaurant',
       'Stationery Store', 'Coworking Space', 'Bar',
       'Latin American Restaurant', 

In [45]:
# check if the results contain "Indian Restaurants"
"Indian Restaurant" in toronto_venues['Venue Category'].unique()

True

In [49]:
#Number of venues per neighborhood
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton, Exhibition Place, Parkdale Village",23,23,23,23,23,23
Business Reply Mail Processing Centre 969 Eastern,19,19,19,19,19,19
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",45,45,45,45,45,45
Central Bay Street,89,89,89,89,89,89
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,85,85,85,85,85,85


In [50]:
# one hot encoding
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighborhoods'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1715, 239)


Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [54]:
# Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(to_grouped.shape)


(38, 239)


In [55]:
to_onehot[['Neighborhoods','Indian Restaurant']]

Unnamed: 0,Neighborhoods,Indian Restaurant
0,The Beaches,0
1,The Beaches,0
2,The Beaches,0
3,The Beaches,0
4,"The Danforth West, Riverdale",0
5,"The Danforth West, Riverdale",0
6,"The Danforth West, Riverdale",0
7,"The Danforth West, Riverdale",0
8,"The Danforth West, Riverdale",0
9,"The Danforth West, Riverdale",0


In [67]:
i = len(to_grouped[to_grouped["Indian Restaurant"] > 0])
j = len(to_grouped[to_grouped["Italian Restaurant"] > 0])
m = len(to_grouped[to_grouped["Mexican Restaurant"] > 0])
c = len(to_grouped[to_grouped["Chinese Restaurant"] > 0])

print(str(i)+" Indian Restaurants")
print(str(j)+" Italian Restaurants")
print(str(m)+" Mexican Restaurants")
print(str(c)+" Chinese Restaurants")

11 Indian Restaurants
25 Italian Restaurants
7 Mexican Restaurants
9 Chinese Restaurants


In [68]:
# Create a new dataframe to find Indian Restaurants only
indian_df = to_grouped[["Neighborhoods","Indian Restaurant"]]

In [69]:
indian_df.head()

Unnamed: 0,Neighborhoods,Indian Restaurant
0,"Adelaide, King, Richmond",0.01
1,Berczy Park,0.017544
2,"Brockton, Exhibition Place, Parkdale Village",0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0


# Cluster Neighborhoods

Run k-means to cluster the neighborhoods in Toronto into 3 clusters.

In [70]:
# set number of clusters
toclusters = 3

to_clustering = indian_df.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=0).fit(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 1, 1, 1, 0, 0, 1, 1, 0])

In [71]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
to_merged = indian_df.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

In [72]:
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.head()

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels
0,"Adelaide, King, Richmond",0.01,0
1,Berczy Park,0.017544,0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,1
3,Business Reply Mail Processing Centre 969 Eastern,0.0,1
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,1


In [73]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
to_merged = to_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(to_merged.shape)
to_merged.head()

(1715, 9)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Adelaide, King, Richmond",0.01,0,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
0,"Adelaide, King, Richmond",0.01,0,43.650571,-79.384568,The Keg Steakhouse & Bar,43.649937,-79.384196,Steakhouse
0,"Adelaide, King, Richmond",0.01,0,43.650571,-79.384568,Nathan Phillips Square,43.65227,-79.383516,Plaza
0,"Adelaide, King, Richmond",0.01,0,43.650571,-79.384568,Rosalinda,43.650252,-79.385156,Vegetarian / Vegan Restaurant
0,"Adelaide, King, Richmond",0.01,0,43.650571,-79.384568,Shangri-La Toronto,43.649129,-79.386557,Hotel


In [74]:
# sort the results by Cluster Labels
print(to_merged.shape)
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged

(1715, 9)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Adelaide, King, Richmond",0.010000,0,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Delta Toronto Club Lounge,43.643085,-79.383885,Roof Deck
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Pearl Harbourfront,43.638157,-79.380688,Chinese Restaurant
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Ripley's Aquarium of Canada,43.642104,-79.386252,Aquarium
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Air Canada Club,43.643224,-79.379159,Lounge
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Joe Bird,43.638204,-79.380355,Fried Chicken Joint
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,TELUS Store,43.643312,-79.380999,Office
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Union Pearson Express,43.644362,-79.383199,Train Station
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Mos Mos,43.641640,-79.377552,Coffee Shop
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,PawsWay,43.638599,-79.384992,Event Space


#### Visualize cluster

In [75]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [77]:
# create map
map_clusters = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=11)

# set color scheme for the clusters
x = np.arange(toclusters)
ys = [i+x+(i*x)**2 for i in range(toclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Neighborhood'], to_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster))
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [78]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

In [79]:
#Cluster 0
to_merged.loc[to_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Adelaide, King, Richmond",0.010000,0,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Delta Toronto Club Lounge,43.643085,-79.383885,Roof Deck
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Pearl Harbourfront,43.638157,-79.380688,Chinese Restaurant
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Ripley's Aquarium of Canada,43.642104,-79.386252,Aquarium
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Air Canada Club,43.643224,-79.379159,Lounge
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Joe Bird,43.638204,-79.380355,Fried Chicken Joint
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,TELUS Store,43.643312,-79.380999,Office
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Union Pearson Express,43.644362,-79.383199,Train Station
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,Mos Mos,43.641640,-79.377552,Coffee Shop
19,"Harbourfront East, Toronto Islands, Union Station",0.010000,0,43.640816,-79.381752,PawsWay,43.638599,-79.384992,Event Space


In [80]:
#Cluster 1
to_merged.loc[to_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Wibke's Espresso Bar,43.649132,-79.484802,Coffee Shop
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Coffee Tree Roastery,43.649647,-79.483436,Café
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Yumi Sushi,43.649891,-79.482404,Sushi Restaurant
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,DAVIDsTEA,43.650584,-79.478853,Tea Room
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Book City (Bloor West),43.650211,-79.481220,Bookstore
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Goodfellas Wood Oven Pizza,43.648224,-79.486356,Italian Restaurant
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Falafel World,43.649801,-79.482728,Falafel Restaurant
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Bloom Restaurant,43.650307,-79.479836,Latin American Restaurant
29,"Runnymede, Swansea",0.0,1,43.651571,-79.484450,Amber European Restaurant,43.649946,-79.482009,French Restaurant
24,"Moore Park, Summerhill East",0.0,1,43.689574,-79.383160,Ravine,43.690356,-79.386841,Trail


In [81]:
#Cluster 2
to_merged.loc[to_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Creeds Coffee Bar,43.6741,-79.410838,Coffee Shop
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Ezra's Pound,43.675153,-79.405858,Café
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Roti Cuisine of India,43.674618,-79.408249,Indian Restaurant
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Rose & Sons,43.675668,-79.403617,American Restaurant
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Jean Sibelius Square,43.671426,-79.408831,Park
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Live Organic Food Bar,43.675053,-79.406715,Vegetarian / Vegan Restaurant
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Madame Boeuf And Flea,43.67524,-79.40662,Burger Joint
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Dish Cooking Studio,43.674066,-79.410764,Café
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Beauty Boutique by Shoppers Drug Mart,43.674959,-79.407986,Cosmetics Shop
34,"The Annex, North Midtown, Yorkville",0.047619,2,43.67271,-79.405678,Toronto Archives,43.676447,-79.407509,History Museum


## Observations

Most Indian locations are in Cluster 2 which is around Riverdale, Central Bay Street area. Areas like Regent Park,  The Junction South, Richmond do not have any Indian Restauraunts. Church and Wellesley areas have very few Indian Restaurants. Regent Park area does not have any Indian Restaurants, but it has some Mexican and Italian restaurants. SO it might be a good oppurtunity to open a Indian Restaurant there, as there would be no competition, but you will have people who do like to eat out. Also Mexican and Indian food have some similar tastes, so would be a good oppurtunity to pull that crowd.