In [1]:
#importing all the necessary libraries

import requests
import pandas as pd

## reading the data

In [2]:
#scrapping the neighborhoods in Canada
url  = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
if page.status_code == 200:
    print('Page download successful')
else:
    print('Page download error. Error code: {}'.format(page.status_code))

Page download successful


In [3]:
df_html = pd.read_html(url, header=0, na_values = ['Not assigned'])[0]
df_html.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## cleaning the data

In [4]:
#cleaning the dataset and removing NaN values

df_html.dropna(subset=['Borough'], inplace=True)


In [5]:
n_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [7]:
df_html[df_html['Neighborhood'].isna()]

Unnamed: 0,Postal Code,Borough,Neighborhood


In [8]:
df_html['Neighborhood'].fillna(df_html['Borough'], inplace=True)
n_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [None]:
df_html[df_html['Borough']=="Queen's Park"]

In [16]:
df_postcodes = df_html.groupby(['Postal Code','Borough']).Neighborhood.agg([('Neighborhood', ', '.join)])
df_postcodes.reset_index(inplace=True)
df_postcodes.head(5)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [17]:
df_html.dropna(subset=['Borough'], inplace=True)

In [18]:
n_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))


Number of rows on which Neighborhood column is empty: 0


In [19]:
print('The shape of the dataset is:',df_postcodes.shape)

The shape of the dataset is: (103, 3)


In [20]:
df_postcodes.to_csv('Toronto_Postcodes.csv')

## getting data for longitudes and lattitudes

In [21]:
import numpy as np

In [22]:
# getting the lattitudes and longitudes

url_csv = 'http://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(url_csv)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [33]:
df_neighborhoods = pd.read_csv('Toronto_Postcodes.csv',index_col=[0])
df_neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [34]:
## checking and merging both datasets by Postal code

df_coordinates.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_neighborhoods.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [35]:
df_neighborhoods_coordinates = pd.merge(df_neighborhoods, df_coordinates, on='PostalCode')
df_neighborhoods_coordinates.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [37]:
df_neighborhoods_coordinates[(df_neighborhoods_coordinates['PostalCode']=='M2N') |
                             (df_neighborhoods_coordinates['PostalCode']=='M6N') ]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
22,M2N,North York,"Willowdale, Willowdale East",43.77012,-79.408493
81,M6N,York,"Runnymede, The Junction North",43.673185,-79.487262


## dividing neighborhoods into 3 clusters

In [38]:
df_neighborhoods_coordinates.to_csv('Toronto_Postcodes_2.csv')

In [39]:
#importing other libraries for clustering
import folium
from sklearn.cluster import KMeans


In [40]:
df = pd.read_csv('Toronto_Postcodes_2.csv', index_col=0)
df.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [41]:
# cleaning the data set further
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [42]:
df.groupby('Borough').count()['Neighborhood']

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Scarborough         17
West Toronto         6
York                 5
Name: Neighborhood, dtype: int64

In [43]:

df_toronto = df[df['Borough'].str.contains('Toronto')]
df_toronto.reset_index(inplace=True)
df_toronto.drop('index', axis=1, inplace=True)
df_toronto.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [44]:
print(df_toronto.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
West Toronto         6
Name: Neighborhood, dtype: int64


In [45]:
boroughs = df_toronto['Borough'].unique().tolist()

In [46]:
lat_toronto = df_toronto['Latitude'].mean()
lon_toronto = df_toronto['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat_toronto, lon_toronto))

The geographical coordinates of Toronto are 43.66713498717948, -79.38987324871795


## mapping the neighborhoods

In [47]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3))

In [48]:
#printing the map


map_toronto = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], 
                                           df_toronto['Longitude'],
                                           df_toronto['Borough'], 
                                           df_toronto['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [49]:

CLIENT_ID = 'UM41T2JALKQ32YFJXQNROZWUE13EX2GSFAJK2R2KG4JFW4BX' 
CLIENT_SECRET = 'NEYYVMBPBFCOM2CQDIZVT2WZITJ0Y21LCRRKN3UV12MMT52L'
VERSION = '20180604' 
LIMIT = 100 
radius = 500 

## Here is where foursquare comes in

In [50]:
#getting venue list and converting it into a data frame and the printing it


def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [51]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                latitudes=df_toronto['Latitude'],
                                longitudes=df_toronto['Longitude'])

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West, Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High 

In [52]:
toronto_venues.shape

(1636, 7)

In [53]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


In [54]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,58,58,58,58,58,58
"Brockton, Parkdale Village, Exhibition Place",26,26,26,26,26,26
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Central Bay Street,67,67,67,67,67,67
Christie,16,16,16,16,16,16
Church and Wellesley,77,77,77,77,77,77
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,37,37,37,37,37,37
Davisville North,9,9,9,9,9,9


In [55]:
# getting the types of venues

print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 232 uniques categories.


## getting all different venues in the neighborhoods

In [56]:
toronto_venues['Venue Category'].unique()[:100]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood',
       'Cosmetics Shop', 'Greek Restaurant', 'Ice Cream Shop',
       'Italian Restaurant', 'Brewery', 'Fruit & Vegetable Store',
       'Yoga Studio', 'Juice Bar', 'Restaurant', 'Pizza Place',
       'Bookstore', 'Bubble Tea Shop', 'Dessert Shop',
       'Furniture / Home Store', 'Spa', 'Grocery Store', 'Coffee Shop',
       'Bakery', 'Caribbean Restaurant', 'Indian Restaurant', 'Café',
       'Lounge', 'Frozen Yogurt Shop', 'Liquor Store',
       'American Restaurant', 'Sushi Restaurant', 'Gym',
       'Fish & Chips Shop', 'Fast Food Restaurant', 'Park', 'Pet Store',
       'Steakhouse', 'Burrito Place', 'Movie Theater', 'Sandwich Place',
       'Board Shop', 'Food & Drink Shop', 'Fish Market', 'Gay Bar',
       'Thai Restaurant', 'Seafood Restaurant', 'Cheese Shop',
       'Comfort Food Restaurant', 'Middle Eastern Restaurant',
       'Stationery Store', 'Coworking Space', 'Wine Bar',
       'Latin American Restaurant', 'Ga

## searching keyword as gym

In [57]:
"Gym" in toronto_venues['Venue Category'].unique()

True

In [58]:
#now we analyse each neghborhood
x_y = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
x_y['Neighborhoods'] = toronto_venues['Neighborhood'] 
fixed_columns = [x_y.columns[-1]] + list(x_y.columns[:-1])
x_y = x_y[fixed_columns]

print(x_y.shape)
x_y.head()

(1636, 233)


Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
#taking the average of the NOS of each venue (ie frequency)


y_z = x_y.groupby(["Neighborhoods"]).mean().reset_index()

print(y_z.shape)
y_z

(39, 233)


Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.014925,0.0,0.014925
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,...,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [62]:
#finding the keyword gym
len(y_z[y_z["Gym"] > 0])

15

## grouping gym spots and making a different dataset

In [63]:
#seperate datasets for the gyms
gym_x = y_z[["Neighborhoods","Gym"]]

In [64]:
gym_x.head()

Unnamed: 0,Neighborhoods,Gym
0,Berczy Park,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.038462
2,"Business reply mail Processing Centre, South C...",0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0
4,Central Bay Street,0.014925


In [65]:
#now we cluster the neighborhoods into 3 variants
clusters = 3
to_cluster = gym_x.drop(["Neighborhoods"], 1)
kmeans = KMeans(n_clusters=clusters, random_state=0).fit(to_cluster)
kmeans.labels_[0:10]


array([0, 1, 0, 0, 0, 0, 0, 1, 1, 2])

In [67]:
#making a datset which includes cluster and top 10 venues of the neighborhood
gym_y = gym_x.copy()
gym_y["Cluster Labels"] = kmeans.labels_


In [68]:
gym_y.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
gym_y.head()

Unnamed: 0,Neighborhood,Gym,Cluster Labels
0,Berczy Park,0.0,0
1,"Brockton, Parkdale Village, Exhibition Place",0.038462,1
2,"Business reply mail Processing Centre, South C...",0.0,0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0
4,Central Bay Street,0.014925,0


In [69]:
#joining original lattitude and longitude dataset with the new gym dataset

gym_y = gym_y.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(gym_y.shape)
gym_y.head()

(1636, 9)


Unnamed: 0,Neighborhood,Gym,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.0,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.0,0,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Hockey Hall Of Fame (Hockey Hall of Fame),43.646974,-79.377323,Museum


## grouping cluster dataset and gym dataset

In [70]:
#arranging the dataset by cluster targets

print(gym_y.shape)
gym_y.sort_values(["Cluster Labels"], inplace=True)
gym_y

(1636, 9)


Unnamed: 0,Neighborhood,Gym,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.000000,0,43.644771,-79.373306,LCBO,43.642944,-79.372440,Liquor Store
19,"Little Portugal, Trinity",0.000000,0,43.647927,-79.419750,Pho Rua Vang (Golden Turtle),43.646893,-79.419778,Vietnamese Restaurant
19,"Little Portugal, Trinity",0.000000,0,43.647927,-79.419750,Lower Ossington Theatre,43.646389,-79.419781,Theater
19,"Little Portugal, Trinity",0.000000,0,43.647927,-79.419750,Bellwoods Brewery Bottle Shop,43.647120,-79.420044,Beer Store
19,"Little Portugal, Trinity",0.000000,0,43.647927,-79.419750,Reposado,43.647321,-79.420032,Bar
...,...,...,...,...,...,...,...,...,...
9,Davisville North,0.111111,2,43.712751,-79.390197,Sherwood Park,43.716551,-79.387776,Park
9,Davisville North,0.111111,2,43.712751,-79.390197,Summerhill Market North,43.715499,-79.392881,Food & Drink Shop
9,Davisville North,0.111111,2,43.712751,-79.390197,Subway,43.708474,-79.390674,Sandwich Place
9,Davisville North,0.111111,2,43.712751,-79.390197,Best Western Roehampton Hotel & Suites,43.708878,-79.390880,Hotel


## mapping the dataset 

In [71]:
#now we view the clusters on the map

import matplotlib.cm as cm
import matplotlib.colors as colors

In [72]:
## making the map

map_clusters = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=11)
x = np.arange(clusters)
ys = [i+x+(i*x)**2 for i in range(clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(gym_y['Neighborhood Latitude'], gym_y['Neighborhood Longitude'], gym_y['Neighborhood'], gym_y['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster))
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [75]:
map_clusters.save('finalmap.html')

# STUDYING THE THREE CLUSTERS

# FIRST CLUSTER

In [77]:
#studying the 3 clusters

gym_y.loc[gym_y['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Gym,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.00,0,43.644771,-79.373306,LCBO,43.642944,-79.372440,Liquor Store
19,"Little Portugal, Trinity",0.00,0,43.647927,-79.419750,Pho Rua Vang (Golden Turtle),43.646893,-79.419778,Vietnamese Restaurant
19,"Little Portugal, Trinity",0.00,0,43.647927,-79.419750,Lower Ossington Theatre,43.646389,-79.419781,Theater
19,"Little Portugal, Trinity",0.00,0,43.647927,-79.419750,Bellwoods Brewery Bottle Shop,43.647120,-79.420044,Beer Store
19,"Little Portugal, Trinity",0.00,0,43.647927,-79.419750,Reposado,43.647321,-79.420032,Bar
...,...,...,...,...,...,...,...,...,...
13,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,GB Hand-Pulled Noodles,43.656434,-79.383783,Chinese Restaurant
13,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,Tangerine Café,43.653937,-79.379722,Bank
13,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,JOEY Eaton Centre,43.656094,-79.381878,New American Restaurant
13,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop


# SECOND CLUSTER

In [78]:
gym_y.loc[gym_y['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Gym,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
31,Stn A PO Boxes,0.020619,1,43.646435,-79.374846,Brookfield Place,43.646791,-79.378769,Shopping Mall
31,Stn A PO Boxes,0.020619,1,43.646435,-79.374846,Indigospirit,43.648350,-79.380347,Bookstore
31,Stn A PO Boxes,0.020619,1,43.646435,-79.374846,Buster's Sea Cove,43.647774,-79.379181,Seafood Restaurant
31,Stn A PO Boxes,0.020619,1,43.646435,-79.374846,St. Lawrence Antique Market,43.649615,-79.371747,Antique Shop
31,Stn A PO Boxes,0.020619,1,43.646435,-79.374846,GoodLife Fitness Toronto Street Women's Only,43.650400,-79.376700,Gym
...,...,...,...,...,...,...,...,...,...
28,"Runnymede, Swansea",0.028571,1,43.651571,-79.484450,RBC Royal Bank,43.650142,-79.480274,Bank
28,"Runnymede, Swansea",0.028571,1,43.651571,-79.484450,Awai,43.650412,-79.478477,Vegetarian / Vegan Restaurant
28,"Runnymede, Swansea",0.028571,1,43.651571,-79.484450,Goodfellas Wood Oven Pizza,43.648224,-79.486356,Italian Restaurant
8,Davisville,0.054054,1,43.704324,-79.388790,Subway,43.708474,-79.390674,Sandwich Place


# THIRD CLUSTER

In [79]:
gym_y.loc[gym_y['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Gym,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
9,Davisville North,0.111111,2,43.712751,-79.390197,Winners,43.713236,-79.393873,Department Store
9,Davisville North,0.111111,2,43.712751,-79.390197,Provocative Pizza Series,43.708293,-79.389546,Pizza Place
9,Davisville North,0.111111,2,43.712751,-79.390197,The Ambassador,43.710418,-79.39186,Hotel
9,Davisville North,0.111111,2,43.712751,-79.390197,Gym,43.713126,-79.393537,Gym
9,Davisville North,0.111111,2,43.712751,-79.390197,Sherwood Park,43.716551,-79.387776,Park
9,Davisville North,0.111111,2,43.712751,-79.390197,Summerhill Market North,43.715499,-79.392881,Food & Drink Shop
9,Davisville North,0.111111,2,43.712751,-79.390197,Subway,43.708474,-79.390674,Sandwich Place
9,Davisville North,0.111111,2,43.712751,-79.390197,Best Western Roehampton Hotel & Suites,43.708878,-79.39088,Hotel
9,Davisville North,0.111111,2,43.712751,-79.390197,Homeway Restaurant & Brunch,43.712641,-79.391557,Breakfast Spot
