IMPORTING ALL THE REQUIRED LIBRARIES


In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import requests
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
!conda install -c conda-forge folium=0.5.0 --yes
import folium 
print("Folium installed")
print("Libraries installed")

!conda install -c conda-forge lxml --yes


Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.0.0-pyh9f0ad1d_0



Downloading and Extracting Packages
geopy-2.0.0          | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ################################

CREATING THE REQUIRED DATAFRAME

In [2]:

url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

tables = pd.read_html(r.text)

df=pd.DataFrame(tables[0])

# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

df.columns=['Postcode','Borough','Neighbourhood']


df.replace('Not assigned',value=np.nan,inplace=True)
df.dropna(axis=0,subset=['Borough'],inplace=True)
df.reset_index(drop=True,inplace=True)
df.rename(columns={'Postcode':'Postal Code'},inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


EXTRACTING THE DATA FROM CSV FILE INTO A DATAFRAME

In [3]:
df_latlng=pd.read_csv("https://cocl.us/Geospatial_data")
df_latlng.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


MERGING BOTH THE DATAFRAMES TO CREATE THE REQUIRED DATAFRAME

In [4]:
df_toronto=pd.merge(df, df_latlng, on='Postal Code')
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [5]:
df_toronto['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

In [6]:
address='Toronto, Canada'

location=Nominatim(user_agent='tr_explorer').geocode(address)
latitude=location.latitude
longitude=location.longitude
print('coordinates of Toronto are {} and {}'.format(latitude,longitude))

coordinates of Toronto are 43.6534817 and -79.3839347


Create a map of New York with neighborhoods superimposed on top

In [7]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [8]:
toronto_data=df_toronto[df_toronto['Borough']=='Downtown Toronto'].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


Geographical coordinates of Downtown Toronto.

In [9]:
address='Downtown Toronto,Canada'
location=Nominatim(user_agent='tr_explorer').geocode(address)
latitude=location.latitude
longitude=location.longitude
print('coordinates of Downtown Toronto are {}and{}'.format(latitude,longitude))

coordinates of Downtown Toronto are 43.6563221and-79.3809161


Visualizing Downtown Toronto, the neighborhoods in it.

In [10]:
map_dt = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'],toronto_data['Longitude'],toronto_data['Neighbourhood']):

    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Utilizing the Foursquare API to explore the neighborhoods and segment them.

Define Foursquare Credentials and Version

In [11]:
CLIENT_ID = 'M5DGU3OSZVATCMG3YC3ZWSZ2LW3REYGMZCRLXSEJARTVGVKF' # your Foursquare ID
CLIENT_SECRET = '2YPQOTBDP5RQBNRRBDTTCPHXYRNQEU4GPIZGPVJKTFIB5CDY' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: M5DGU3OSZVATCMG3YC3ZWSZ2LW3REYGMZCRLXSEJARTVGVKF
CLIENT_SECRET:2YPQOTBDP5RQBNRRBDTTCPHXYRNQEU4GPIZGPVJKTFIB5CDY


Explore the first neighborhood in our dataframe.

In [12]:
neighborhood_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


100 venues that are in Harbourfront within a radius of 500 meters.

create the GET request URL. Name your URL url.

In [13]:
radius=500
LIMIT=100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=M5DGU3OSZVATCMG3YC3ZWSZ2LW3REYGMZCRLXSEJARTVGVKF&client_secret=2YPQOTBDP5RQBNRRBDTTCPHXYRNQEU4GPIZGPVJKTFIB5CDY&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

Send the GET request and examine the resutls

In [55]:
results = requests.get(url).json()

Function that extracts the category of the venue

In [15]:
def category_type(row):
    try:
        categories_list=row['categories']
    except:
        categories_list=row['venue.categories']
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

Clean the json and structure it into a pandas dataframe.

In [16]:
from pandas.io.json import json_normalize 

In [17]:
venues=results['response']['groups'][0]['items']
nearby_venues=json_normalize(venues)
#filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Impact Kitchen,Restaurant,43.656369,-79.35698


In [18]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

45 venues were returned by Foursquare.


Create a function to repeat the same process to all the neighborhoods in Toronto

In [19]:
def getNearbyVenues(names,latitudes,longitudes,radius=500):
    venues_list=[]
    for name,lat,lng in zip(names,latitudes,longitudes):
        print(name)
        
    #create the url
    url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)


Now write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues.

In [20]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


check the size of the resulting dataframe

In [21]:
print(toronto_venues.shape)
toronto_venues

(75, 7)


Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Church and Wellesley,43.66586,-79.38316,Storm Crow Manor,43.666840,-79.381593,Theme Restaurant
1,Church and Wellesley,43.66586,-79.38316,DanceLifeX Centre,43.666956,-79.385297,Dance Studio
2,Church and Wellesley,43.66586,-79.38316,Smith,43.666927,-79.381421,Breakfast Spot
3,Church and Wellesley,43.66586,-79.38316,The Alley,43.665922,-79.385567,Bubble Tea Shop
4,Church and Wellesley,43.66586,-79.38316,Barbara Hall Park,43.666879,-79.381068,Park
...,...,...,...,...,...,...,...
70,Church and Wellesley,43.66586,-79.38316,Alexander Wood Statue,43.663702,-79.380301,Sculpture Garden
71,Church and Wellesley,43.66586,-79.38316,Croissant Tree,43.669575,-79.382331,Coffee Shop
72,Church and Wellesley,43.66586,-79.38316,Asahi Sushi,43.669874,-79.382943,Sushi Restaurant
73,Church and Wellesley,43.66586,-79.38316,A&W,43.666415,-79.378235,Fast Food Restaurant


In [22]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Church and Wellesley,75,75,75,75,75,75


In [23]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 51 uniques categories.


Analyze Each Neighborhood

In [24]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_data['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Arts & Crafts Store,Beer Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Burrito Place,...,Salon / Barbershop,Sculpture Garden,Smoke Shop,Steakhouse,Strip Club,Sushi Restaurant,Thai Restaurant,Theater,Theme Restaurant,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,"Queen's Park, Ontario Provincial Government",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Garden District, Ryerson",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,St. James Town,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Berczy Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
toronto_onehot.shape

(75, 52)

In [57]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Arts & Crafts Store,Beer Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Burrito Place,Café,Caribbean Restaurant,Clothing Store,Coffee Shop,Creperie,Dance Studio,Diner,Distribution Center,Dog Run,Ethiopian Restaurant,Fast Food Restaurant,Food & Drink Shop,Gastropub,Gay Bar,Gym,Health & Beauty Service,Hobby Shop,Hotel,Ice Cream Shop,Indian Restaurant,Japanese Restaurant,Juice Bar,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Park,Pizza Place,Pub,Ramen Restaurant,Restaurant,Sake Bar,Salon / Barbershop,Sculpture Garden,Smoke Shop,Steakhouse,Strip Club,Sushi Restaurant,Thai Restaurant,Theater,Theme Restaurant,Yoga Studio
0,Berczy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Central Bay Street,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
3,Christie,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Church and Wellesley,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
toronto_grouped.shape

(19, 52)

Print each neighborhood along with the top 5 most common venues.

In [28]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                 venue  freq
0                 Park   1.0
1    Afghan Restaurant   0.0
2     Ramen Restaurant   0.0
3    Indian Restaurant   0.0
4  Japanese Restaurant   0.0


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                 venue  freq
0       Ice Cream Shop   1.0
1     Ramen Restaurant   0.0
2    Indian Restaurant   0.0
3  Japanese Restaurant   0.0
4            Juice Bar   0.0


----Central Bay Street----
                 venue  freq
0           Restaurant   1.0
1     Ramen Restaurant   0.0
2    Indian Restaurant   0.0
3  Japanese Restaurant   0.0
4            Juice Bar   0.0


----Christie----
                 venue  freq
0            Bookstore   1.0
1    Afghan Restaurant   0.0
2     Ramen Restaurant   0.0
3    Indian Restaurant   0.0
4  Japanese Restaurant   0.0


----Church and Wellesley----
                 venue  freq
0             Creperie   1.0
1                Hotel   0.0
2

Put that into a pandas dataframe

In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [30]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Park,Yoga Studio,Health & Beauty Service,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
1,"CN Tower, King and Spadina, Railway Lands, Har...",Ice Cream Shop,Yoga Studio,Health & Beauty Service,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
2,Central Bay Street,Restaurant,Yoga Studio,Coffee Shop,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
3,Christie,Bookstore,Yoga Studio,Coffee Shop,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
4,Church and Wellesley,Creperie,Yoga Studio,Coffee Shop,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center


Cluster Neighborhoods

In [31]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 3, 0, 0, 0, 4, 0], dtype=int32)

In [56]:
# add clustering labels
neighborhoods_venues_sorted.insert(loc=0, column='Cluster Labels',value= kmeans.labels_,allow_duplicates=True)
toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Cluster Labels.1,Cluster Labels.2,Cluster Labels.3,Cluster Labels.4,Cluster Labels.5,Cluster Labels.6,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,0,0,0,0,0,0,Theme Restaurant,Health & Beauty Service,Gym,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1,1,1,1,1,1,1,Dance Studio,Yoga Studio,Coffee Shop,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,0,0,0,0,0,0,Breakfast Spot,Yoga Studio,Coffee Shop,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,0,0,0,0,0,0,Bubble Tea Shop,Yoga Studio,Coffee Shop,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,0,0,0,0,0,0,Park,Yoga Studio,Health & Beauty Service,Gay Bar,Gastropub,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,Dog Run,Distribution Center


In [53]:
from sklearn.cluster import KMeans
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import matplotlib.cm as cm
import matplotlib.colors as colors

print('libraries imported')
# create map

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon],radius=5,popup=label,fill=True,fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

libraries imported
