importing the required libraries

In [1]:
import pandas as pd
import numpy as np
import folium
import requests
from bs4 import BeautifulSoup
import lxml
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

The Wikipedia url conatins the list of postcodes,borough and Neighborhoods around them.The page in loaded in the below cell

In [2]:
url  = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
if page.status_code == 200:
    print('Page is loaded')
else:
    print('Page load error. Error code: {}'.format(page.status_code))

Page is loaded


The page is read and the table is created and where ever there is 'Not assigned' replace it with NaN so that it is easy to monitor the dataset later on

In [3]:
df_html = pd.read_html(url, header=0, na_values = ['Not assigned'])[0]
df_html.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


The Borough column with NaN values are dropped

In [4]:
df_html.dropna(subset=['Borough'], inplace=True)

The Neighborhood column is checked for NaN values

In [5]:
empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print(empty_neighborhood)

0


NaN values in Neighborhood column is replaced by corresponding Borough value

In [6]:
df_html['Neighborhood'].fillna(df_html['Borough'], inplace=True)

The dataframe generated from the web page is grouped based on postal code and Borough.The neighborhood column is joined.The default joining way is via '/' and is replaced with ',' so that handing becomes easy

In [7]:
df_grouped_postcode=pd.DataFrame(df_html.groupby(['Postal code','Borough']).Neighborhood.agg([('Neighborhood', ', '.join)]))
df_grouped_postcode.reset_index(inplace=True)
df_grouped_postcode["Neighborhood"]=df_grouped_postcode['Neighborhood'].str.replace('/',',')
df_grouped_postcode.head(5)

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


A Borough and PostalCode may have many Neighborhoods so it is ideal to group them and them join then neighborhood column which has same Borough and PostalCode.The group by function joins Neighborhood using / .I used replace method to replace / by ,

In [8]:
print('The shape of the dataset is:',df_grouped_postcode.shape)

The shape of the dataset is: (103, 3)


The Dataset is now cleansed and transformed so that it can be operated on.Let's save the data set

In [9]:
df_grouped_postcode.to_csv('Toronto_postcodes_grouped_neighborhood.csv')

Lets now handle Geospatial data  
The following lines of code loads the csv file and saves as data frame

In [10]:
url_csv = 'http://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(url_csv)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


The datset constains postal code and its corresponding latitude and longitude

In [11]:
df_neighborhoods=pd.read_csv('Toronto_postcodes_grouped_neighborhood.csv',index_col=[0])
df_neighborhoods.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Analysis showed that the column name Postal Code in df_coordinates and df_neighborhoods were different so decided to rename it so that the dataframe can be merged

In [12]:
df_coordinates.rename(columns={'Postal Code':'PostalCode'},inplace=True)
df_neighborhoods.rename(columns={'Postal code':'PostalCode'},inplace=True)

Checking the column names of both the datframes

In [13]:
df_coordinates.columns

Index(['PostalCode', 'Latitude', 'Longitude'], dtype='object')

In [14]:
df_neighborhoods.columns

Index(['PostalCode', 'Borough', 'Neighborhood'], dtype='object')

The following code Merges the two dataframe using PostalCode column as the axis

In [15]:
df_ne_co=pd.merge(df_neighborhoods,df_coordinates,on='PostalCode')
df_ne_co.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Save the combined dataframe as csv file

In [16]:
df_ne_co.to_csv('neighbor_coordinates_combined.csv')

The combined csv file is loaded and the first column(index in the saved file) is set as index

In [17]:
df=pd.read_csv('neighbor_coordinates_combined.csv',index_col=[0])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Describe method is called so as to analyse the dataset

In [18]:
df.describe(include="all")

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
count,103,103,103,103.0,103.0
unique,103,10,98,,
top,M5N,North York,Downsview,,
freq,1,24,4,,
mean,,,,43.704608,-79.397153
std,,,,0.052463,0.097146
min,,,,43.602414,-79.615819
25%,,,,43.660567,-79.464763
50%,,,,43.696948,-79.38879
75%,,,,43.74532,-79.340923


The data is grouped on the basis of Borough and the number of entries named under each borough is cound so as to make sure that there is no missing entries

In [19]:
df.groupby('Borough').count()

Unnamed: 0_level_0,PostalCode,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,19,19,19,19
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,12,12,12,12
Mississauga,1,1,1,1
North York,24,24,24,24
Scarborough,17,17,17,17
West Toronto,6,6,6,6
York,5,5,5,5


The following code runs through all the Borough's in the DataSet and selects those which has Toronto in it's name as we are dealing with places that are nearby this helps in selecting only the required amount of data instead of running out algorithm on unnecssary data

In [20]:
borough_names = list(df.Borough.unique())

borough_with_toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_with_toronto.append(x)
        
borough_with_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [21]:
toronto_df = df[df['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(toronto_df.shape)
toronto_df.head()

(39, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West , Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar , The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [22]:
toronto_df.groupby('Borough').count()

Unnamed: 0_level_0,PostalCode,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,19,19,19,19
East Toronto,5,5,5,5
West Toronto,6,6,6,6


The foldlowing code checks if we sorted out the Toronto region correctly or not.Only those Borough with Toronto must be displayed below

In [23]:
borough_list_toronto=toronto_df.groupby('Borough').count().index.tolist()
borough_list_toronto

['Central Toronto', 'Downtown Toronto', 'East Toronto', 'West Toronto']

Fetching latitude and longitude of Toronto so that we can show the region in map generated with proper zooming to show out markers

In [25]:
address="Toronto"

geolocator=Nominatim(user_agent='ny_explorer')
location=geolocator.geocode(address)
toronto_latitude=location.latitude
toronto_longitude=location.longitude
print('cordinates of {} is {},{}'.format(address,toronto_latitude,toronto_longitude))

cordinates of Toronto is 43.6534817,-79.3839347


Drawing the map of Toronto uisng Folium Library

In [26]:
map_toronto=folium.Map(location=[toronto_latitude,toronto_longitude],zoom_start=10)
map_toronto

The following code draws a CircleMarker in the defined latitude and longitude location.The value of latitude and longitude is obtained from the Dataframe with borough having Toronto in their name 

In [27]:
for lat,long,borough,neighborhood in zip(toronto_df['Latitude'],
                                         toronto_df['Longitude'],
                                         toronto_df['Borough'],
                                         toronto_df['Neighborhood']):
    label_text='borough: '+borough+'\n'+'neighborhood: '+neighborhood
    label=folium.Popup(label_text)
    folium.CircleMarker([lat,long],
                       radius=5,
                       popup=label,
                       color="yellow",
                       fill_color="blue",
                       fill_opacity=0.7).add_to(map_toronto)
map_toronto

The following is the login credentials for using foursquare API

In [28]:
CLIENT_ID = 'R3XSWWZIGQLC5EYAJQGPVMMVUJHUXAPP5DOLXXIXT4ZIJGIQ' # your Foursquare ID
CLIENT_SECRET = 'UDDFHUI14E0RISXYADU22EQ40RX2R0YV0IAQDQNCEIKOEIYX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
radius=500
LIMIT=100
print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: R3XSWWZIGQLC5EYAJQGPVMMVUJHUXAPP5DOLXXIXT4ZIJGIQ
CLIENT_SECRET:UDDFHUI14E0RISXYADU22EQ40RX2R0YV0IAQDQNCEIKOEIYX


The following function takes in the latitude and longitudinal cooridnates defined in the Dataset and pull out the venues belonging to different categories and their exact geographical coordinates.This helps in exploring more on the region and generating the dataset 

In [29]:
def GetNearByVenues(names,latitudes,longitudes):
    venues_list=[]
    for name,lat,lng in zip(names,latitudes,longitudes):
        url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
   
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues_df=GetNearByVenues(toronto_df['Neighborhood'],toronto_df['Latitude'],toronto_df['Longitude'])

In [31]:
toronto_venues_df.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West , Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [32]:
toronto_venues_df.shape

(1609, 7)

Checking Number of categories so as to explore the data

In [33]:
categories=toronto_venues_df['Venue Category'].unique().tolist()

In [34]:
len(categories)

231

Checking whether "Indian Resaurant" is one of the categories so that we can move forward with our exploration

In [35]:
"Indian Restaurant" in categories

True

Our checking gives True saying that Indian Restaurant is one of the categories.So inorder to group the Indian Restaurents into different groups it is ideal to generate a new dataframe with the column values equal to the categories and geneate a one hot vector using get_dummies method of pandas.Since Neighborhoods forms one of the major column we prefer it to be in the beginning so we modify the datafrmae a bit.

In [36]:
toronto_catg_one_hot=pd.get_dummies(toronto_venues_df[['Venue Category']],prefix="",prefix_sep="")
toronto_catg_one_hot['Neighborhoods']=toronto_venues_df['Neighborhood']
#toronto_catg_one_hot.head()
columns_inorder=[toronto_catg_one_hot.columns[-1]]+list(toronto_catg_one_hot.columns[:-1])
toronto_catg_one_hot=toronto_catg_one_hot[columns_inorder]
toronto_catg_one_hot.head()

Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West , Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group the one hot vectors on the basis of Neighborhoods and take mean of grouped columns.

In [37]:
toronto_onehot_grouped=toronto_catg_one_hot.groupby(['Neighborhoods']).mean().reset_index()
toronto_onehot_grouped.head()

Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,...,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower , King and Spadina , Railway Lands , ...",0.066667,0.066667,0.066667,0.133333,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.016129


Check how many Indian Restaurants are there in toronto region

In [38]:
len(toronto_onehot_grouped[toronto_onehot_grouped["Indian Restaurant"]>0])

7

Form a new DataFrame containing only the Neighborhoods and The Indian Restaurants column

In [53]:
toronto_indian_restaurants=toronto_onehot_grouped[["Neighborhoods","Indian Restaurant"]]
toronto_indian_restaurants.head()

Unnamed: 0,Neighborhoods,Indian Restaurant
0,Berczy Park,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.0
2,Business reply mail Processing CentrE,0.0
3,"CN Tower , King and Spadina , Railway Lands , ...",0.0
4,Central Bay Street,0.016129


Apply K-means algorithm on the grouped one hot data set with only Indian Restaurant column

In [40]:
No_of_clusters=3

cluster_df=toronto_indian_restaurants.drop(["Neighborhoods"],axis=1)
kmeans=KMeans(n_clusters=No_of_clusters,random_state=1).fit(cluster_df)

kmeans.labels_[0:20]

array([1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1],
      dtype=int32)

Create a new Dataframe which the  neighborhood,Indian Restaurant mean value of that neighborhood ahd the cluster label from the algorithm

In [41]:
toronto_indian_restaurants_with_label=toronto_indian_restaurants.copy()
toronto_indian_restaurants_with_label["ClusterLabel"]=kmeans.labels_
toronto_indian_restaurants_with_label.rename(columns={"Neighborhoods":"Neighborhood"},inplace=True)
toronto_indian_restaurants_with_label.head()

Unnamed: 0,Neighborhood,Indian Restaurant,ClusterLabel
0,Berczy Park,0.0,1
1,"Brockton , Parkdale Village , Exhibition Place",0.0,1
2,Business reply mail Processing CentrE,0.0,1
3,"CN Tower , King and Spadina , Railway Lands , ...",0.0,1
4,Central Bay Street,0.016129,0


combine the above dataframe with the venues dataframe which the venue dataframe that has the venue details using Neighborhood as joining column

In [42]:
toronto_indian_restaurants_with_label=toronto_indian_restaurants_with_label.join(toronto_venues_df.set_index('Neighborhood'),on='Neighborhood')

In [43]:
toronto_indian_restaurants_with_label.sort_values(['ClusterLabel'],inplace=True)
toronto_indian_restaurants_with_label.head()

Unnamed: 0,Neighborhood,Indian Restaurant,ClusterLabel,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
36,"The Danforth West , Riverdale",0.023256,0,43.679557,-79.352188,Re: Reading,43.678507,-79.347678,Bookstore
14,"Harbourfront East , Union Station , Toronto Is...",0.01,0,43.640816,-79.381752,Starbucks,43.64309,-79.383071,Coffee Shop
14,"Harbourfront East , Union Station , Toronto Is...",0.01,0,43.640816,-79.381752,Kupfert & Kim,43.641179,-79.378144,Vegetarian / Vegan Restaurant
14,"Harbourfront East , Union Station , Toronto Is...",0.01,0,43.640816,-79.381752,Subway,43.639708,-79.383441,Sandwich Place
14,"Harbourfront East , Union Station , Toronto Is...",0.01,0,43.640816,-79.381752,Pie Bar,43.638174,-79.380546,Pizza Place


We have applied the alogorithm and sorted the different neighborhoods into differnet clusters .Lets visualise them using folium

In [45]:
cluster_map=folium.Map(location=[toronto_latitude,toronto_longitude],zoom_start=10)

markers_colors={}
markers_colors[0] = 'red'
markers_colors[1] = 'blue'
markers_colors[2] = 'green'
markers_colors[3] = 'yellow'
markers_colors[4] = 'cyan'
markers_colors[5] = 'black'

for lat,long,cluster in zip(toronto_indian_restaurants_with_label['Neighborhood Latitude'],
                            toronto_indian_restaurants_with_label['Neighborhood Longitude'],
                            toronto_indian_restaurants_with_label['ClusterLabel']):
    label_text='cluster : '+str(cluster)
    label=folium.Popup(label_text)
    folium.CircleMarker([lat,long],
                             radius=5,
                             popup=label,
                             color =markers_colors[cluster],
                             fill_color=markers_colors[cluster],
                             fill_opacity=0.8).add_to(cluster_map)   

cluster_map

The following is the list of Neighborhoods that have Indian Restaurant and belong to Cluster 0

In [46]:
#cluster 0
toronto_indian_restaurants_with_label.loc[(toronto_indian_restaurants_with_label['ClusterLabel']==0) & (toronto_indian_restaurants_with_label['Venue Category']=='Indian Restaurant')]

Unnamed: 0,Neighborhood,Indian Restaurant,ClusterLabel,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
36,"The Danforth West , Riverdale",0.023256,0,43.679557,-79.352188,Sher-E-Punjab,43.677308,-79.353066,Indian Restaurant
6,Church and Wellesley,0.014085,0,43.66586,-79.38316,Kothur Indian Cuisine,43.667872,-79.385659,Indian Restaurant
8,Davisville,0.027778,0,43.704324,-79.38879,Marigold Indian Bistro,43.702881,-79.388008,Indian Restaurant
14,"Harbourfront East , Union Station , Toronto Is...",0.01,0,43.640816,-79.381752,Indian Roti House,43.63906,-79.385422,Indian Restaurant
30,"St. James Town , Cabbagetown",0.021739,0,43.667967,-79.367675,Butter Chicken Factory,43.667072,-79.369184,Indian Restaurant
4,Central Bay Street,0.016129,0,43.657952,-79.387383,Colaba Junction,43.66094,-79.385635,Indian Restaurant


The following is the list of Neighborhoods that have Indian Restaurant and belong to Cluster 1

In [47]:
#cluster 1
toronto_indian_restaurants_with_label.loc[(toronto_indian_restaurants_with_label['ClusterLabel']==1) & (toronto_indian_restaurants_with_label['Venue Category']=='Indian Restaurant')]

Unnamed: 0,Neighborhood,Indian Restaurant,ClusterLabel,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


The following is the list of Neighborhoods that have Indian Restaurant and belong to Cluster 2

In [48]:
#cluster 2
toronto_indian_restaurants_with_label.loc[(toronto_indian_restaurants_with_label['ClusterLabel']==2) & (toronto_indian_restaurants_with_label['Venue Category']=='Indian Restaurant')]

Unnamed: 0,Neighborhood,Indian Restaurant,ClusterLabel,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
34,"The Annex , North Midtown , Yorkville",0.043478,2,43.67271,-79.405678,Roti Cuisine of India,43.674618,-79.408249,Indian Restaurant


# conclusion

From the above 3 cells I can infer that  


--->cluster 0 which includes neighborhoods like Central Bay Street,St. James Town ,The Danforth West , Riverdale Cabbagetown,Church and Wellesley ,Davisville has around 6 Indian Restaurants

--->Cluster 1 has no Indian Restaurants

--->Cluster 2 that includes neighborhoods like The Annex , North Midtown , Yorkvillehas 1 Indian Restaurants 

#### Looking at the above details .The project recommends to start the business in regions involving cluster 2 or cluster 3.Since cluster 1 is over-crowded it is not recommended to start a business there. Cluster 2 is a safe option while cluster1 is more of a gamble if there are more Indian inhabitants in the neighborhood it could be opted.


Thank You