# Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

## Part 1:
#### 1.1 Scrap data from Wikipedia page into a DataFrame

In [137]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

In [138]:
# getting data from internet
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text
# using beautiful soup to parse the HTML/XML codes.
soup = BeautifulSoup(raw_wikipedia_page,'xml')
# print(soup.prettify())

In [139]:
#extracting raw table (from webpage)
table = soup.find('table')
Postcode      = []
Borough       = []
Neighbourhood = []
# print(table)

In [140]:
# extracting a clean form of the table
for tr_cell in table.find_all('tr'):
    counter = 1
    Postcode_var      = -1
    Borough_var       = -1
    Neighbourhood_var = -1
    for td_cell in tr_cell.find_all('td'):
            if counter == 1: 
                Postcode_var = td_cell.text
            if counter == 2: 
                Borough_var = td_cell.text
                tag_a_Borough = td_cell.find('a')
            if counter == 3: 
                Neighbourhood_var = str(td_cell.text).strip()
                tag_a_Neighbourhood = td_cell.find('a')
            counter +=1
            if (Postcode_var == 'Not assigned' or Borough_var == 'Not assigned' or Neighbourhood_var == 'Not assigned'):
                continue
            try:
                if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):
                    continue
            except:
                pass
            if(Postcode_var == -1 or Borough_var == -1 or Neighbourhood_var == -1):
                continue
            Postcode.append(Postcode_var)
            Borough.append(Borough_var)
            Neighbourhood.append(Neighbourhood_var)
#             print(Postcode_var,Borough_var,Neighbourhood_var)

#### 1.2 integrating Postal codes with more than 1 neighbour

In [141]:
unique_p = set(Postcode)
print('num of unique Postal codes:', len(unique_p))
Postcode_u      = Postcode
Borough_u       = Borough
Neighbourhood_u = Neighbourhood
for postcode_unique_element in unique_p:
    p_var = ''
    b_var = '' 
    n_var = ''
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element
            b_var = Borough[postcode_idx]
            if n_var == '':
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_u.append(p_var)
    Borough_u.append(b_var)
    Neighbourhood_u.append(n_var)
# print(Postcode_u,Borough_u,Neighbourhood_u)

num of unique Postal codes: 77


#### 1.3 Post-processing: creating an appropriate Pandas Dataframe

In [142]:
toronto_dict = {'Postcode':Postcode_u, 'Borough':Borough_u, 'Neighbourhood':Neighbourhood_u}
df = pd.DataFrame.from_dict(toronto_dict)
df.to_csv('toronto_part1.csv')
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


#### 1.4 Group neighborhoods in the same borough

In [143]:
df = df.groupby(["Postcode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern, Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union, Highla..."
2,M1E,Scarborough,"Morningside, West Hill, Morningside, West Hill"
3,M1G,Scarborough,"Woburn, Woburn"
4,M1J,Scarborough,"Scarborough Village, Scarborough Village"


In [144]:
df.shape

(77, 3)

## Part 2:

Now that you have built a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name, in order to utilize the Foursquare location data, we need to get the latitude and the longitude coordinates of each neighborhood.

In an older version of this course, we were leveraging the Google Maps Geocoding API to get the latitude and the longitude coordinates of each neighborhood. However, recently Google started charging for their API: http://geoawesomeness.com/developers-up-in-arms-over-google-maps-api-insane-price-hike/, so we will use the Geocoder Python package instead: https://geocoder.readthedocs.io/index.html.

The problem with this Package is you have to be persistent sometimes in order to get the geographical coordinates of a given postal code. So you can make a call to get the latitude and longitude coordinates of a given postal code and the result would be None, and then make the call again and you would get the coordinates. So, in order to make sure that you get the coordinates for all of our neighborhoods, you can run a while loop for each postal code. Taking postal code M5G as an example, your code would look something like this:

In [145]:
# !conda install -c conda-forge geocoder --yes
# print('success!')

In [146]:
# import geocoder
# lat_lng_coords = None
# df['Latitude'] = pd.Series("", index=df.index)
# df['Longitude'] = pd.Series("", index=df.index)
# df.columns

#### 2.1 Load the coordinates from the csv file on Coursera

In [147]:
coordinates = pd.read_csv("https://cocl.us/Geospatial_data")
coordinates.rename(columns={"Postal Code": "Postcode"}, inplace=True)
coordinates.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### 2.2 Merge two tables to get the coordinates

In [148]:
df_new = df.merge(coordinates, on="Postcode", how="left")
df_new.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern, Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union, Highla...",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,"Woburn, Woburn",43.770992,-79.216917
4,M1J,Scarborough,"Scarborough Village, Scarborough Village",43.744734,-79.239476


## Part 3:
Explore and cluster the neighborhoods in Toronto. You can decide to work with only boroughs that contain the word Toronto and then replicate the same analysis we did to the New York City data. It is up to you.

Just make sure:

to add enough Markdown cells to explain what you decided to do and to report any observations you make.
to generate maps to visualize your neighborhoods and how they cluster together.
Once you are happy with your analysis, submit a link to the new Notebook on your Github repository. (3 marks)

In [149]:
# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 
print("success")

success


In [150]:
#Use geopy library to get the latitude and longitude values of Toronto
address = 'Toronto,ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of ',address,' are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of  Toronto,ON  are 43.653963, -79.387207.


In [151]:
!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library
print("success")

Solving environment: done

# All requested packages already installed.

success


In [173]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_new['Latitude'], df_new['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
            
map_toronto

#### Use the Foursquare API to explore the neighborhoods

In [153]:
#Use the Foursquare API to explore the neighborhoods¶
# define Foursquare Credentials and Version
# CLIENT_ID = '1250913069@qq.com' # your Foursquare ID
# CLIENT_SECRET = 'zrcc2019' # your Foursquare Secret
# VERSION = '20180605' # Foursquare API version

# print('Your credentails:')
# print('CLIENT_ID: ' + CLIENT_ID)
# print('CLIENT_SECRET:' + CLIENT_SECRET)
CLIENT_ID = 'IXTZWEXH0I0JH3IAR2YVX53QK4OGYW3JEKMRMC2BNS1AN1LC' # your Foursquare ID
CLIENT_SECRET = 'EPSLWPUDOHGGPEZBEABK5E2AP2ODGR0YS4LDB11MVIR421B3' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [154]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):   
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [155]:
# Get venues for all neighborhoods in our dataset
toronto_venues = getNearbyVenues(names=df_new['Neighbourhood'],
                                latitudes=df_new['Latitude'],
                                longitudes=df_new['Longitude'])
toronto_venues.head()
toronto_venues.shape

Rouge, Malvern, Rouge, Malvern
Highland Creek, Rouge Hill, Port Union, Highland Creek, Rouge Hill, Port Union
Morningside, West Hill, Morningside, West Hill
Woburn, Woburn
Scarborough Village, Scarborough Village
Ionview, Kennedy Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge, Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Cliffcrest, Cliffside
Birch Cliff, Birch Cliff
Dorset Park, Scarborough Town Centre, Wexford Heights, Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford, Maryvale, Wexford
Agincourt, Agincourt
Tam O'Shanter, Tam O'Shanter
Agincourt North, Milliken, Agincourt North, Milliken
Upper Rouge, Upper Rouge
Hillcrest Village, Hillcrest Village
Henry Farm, Henry Farm
Bayview Village, Bayview Village
York Mills, York Mills
Newtonbrook, Willowdale, Newtonbrook, Willowdale
Willowdale West, Willowdale West
Parkwoods, Parkwoods
Flemingdon Park, Flemingdon Park
Bathurst Manor, Wilson Heights, Bathurst Manor, Wilson Heights
Northwood Park, Yo

(1593, 7)

In [156]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern, Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge, Malvern, Rouge, Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek, Rouge Hill, Port Union, Highla...",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Morningside, West Hill, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Morningside, West Hill, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [157]:
#Number of venues per neighborhood
toronto_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Agincourt North, Milliken, Agincourt North, Milliken",2,2,2,2,2,2
"Agincourt, Agincourt",5,5,5,5,5,5
"Alderwood, Long Branch, Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Bathurst Manor, Wilson Heights",19,19,19,19,19,19
"Bayview Village, Bayview Village",4,4,4,4,4,4
"Beaumond Heights, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown, Beaumond Heights, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",12,12,12,12,12,12
"Bedford Park, Bedford Park",26,26,26,26,26,26
"Berczy Park, Berczy Park",56,56,56,56,56,56
"Birch Cliff, Birch Cliff",4,4,4,4,4,4
"CFB Toronto, CFB Toronto",4,4,4,4,4,4


In [158]:
#Number of unique venue categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 245 uniques categories.


In [159]:
#print out the list of categories
toronto_venues['Venue Category'].unique()[:100]

array(['Fast Food Restaurant', 'Print Shop', 'Bar', 'Pizza Place',
       'Electronics Store', 'Spa', 'Mexican Restaurant',
       'Rental Car Location', 'Medical Center', 'Intersection',
       'Breakfast Spot', 'Coffee Shop', 'Korean Restaurant',
       'Convenience Store', 'Playground', 'Department Store',
       'Chinese Restaurant', 'Hobby Shop', 'Bus Station',
       'Ice Cream Shop', 'Bus Line', 'Metro Station', 'Bakery', 'Park',
       'Soccer Field', 'Motel', 'American Restaurant', 'Café',
       'General Entertainment', 'Skating Rink', 'College Stadium',
       'Indian Restaurant', 'Pet Store', 'Vietnamese Restaurant',
       'Sandwich Place', 'Middle Eastern Restaurant', 'Auto Garage',
       'Lounge', 'Latin American Restaurant', 'Clothing Store',
       'Italian Restaurant', 'Noodle House', 'Thai Restaurant', 'Bank',
       'Gas Station', 'Fried Chicken Joint', 'Pharmacy', 'Golf Course',
       'Pool', 'Mediterranean Restaurant', 'Dog Run',
       'Athletics & Sports', 'To

In [160]:
# one hot encoding
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighborhoods'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1593, 246)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge, Malvern, Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern, Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek, Rouge Hill, Port Union, Highla...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Morningside, West Hill, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Morningside, West Hill, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [161]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()
to_grouped

Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Agincourt North, Milliken, Agincourt North, Mi...",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
1,"Agincourt, Agincourt",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
2,"Alderwood, Long Branch, Alderwood, Long Branch",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
3,"Bathurst Manor, Wilson Heights, Bathurst Manor...",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.052632,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
4,"Bayview Village, Bayview Village",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
5,"Beaumond Heights, Jamestown, Mount Olive, Silv...",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
6,"Bedford Park, Bedford Park",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.038462,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
7,"Berczy Park, Berczy Park",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.017857,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
8,"Birch Cliff, Birch Cliff",0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000
9,"CFB Toronto, CFB Toronto",0.0,0.000000,0.2500,0.0000,0.0000,0.000,0.0000,0.000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.0,0.000000


In [162]:
#find Asian Restaurants only
to_asian = to_grouped[["Neighborhoods","Thai Restaurant"]]
to_asian.head()

Unnamed: 0,Neighborhoods,Thai Restaurant
0,"Agincourt North, Milliken, Agincourt North, Mi...",0.0
1,"Agincourt, Agincourt",0.0
2,"Alderwood, Long Branch, Alderwood, Long Branch",0.0
3,"Bathurst Manor, Wilson Heights, Bathurst Manor...",0.0
4,"Bayview Village, Bayview Village",0.0


## cluster neighborhood

In [163]:
from sklearn.cluster import KMeans
# set number of clusters
toclusters = 3

to_clustering = to_asian.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=0).fit(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
to_merged = to_asian.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.shape
to_merged.head()

Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels
0,"Agincourt North, Milliken, Agincourt North, Mi...",0.0,0
1,"Agincourt, Agincourt",0.0,0
2,"Alderwood, Long Branch, Alderwood, Long Branch",0.0,0
3,"Bathurst Manor, Wilson Heights, Bathurst Manor...",0.0,0
4,"Bayview Village, Bayview Village",0.0,0


In [164]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
to_merged = to_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(to_merged.shape)
to_merged.head()

(1593, 9)


Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Agincourt North, Milliken, Agincourt North, Mi...",0.0,0,43.815252,-79.284577,Port Royal Park,43.815477,-79.289773,Park
0,"Agincourt North, Milliken, Agincourt North, Mi...",0.0,0,43.815252,-79.284577,Milliken Public School Playground,43.815383,-79.289867,Playground
1,"Agincourt, Agincourt",0.0,0,43.7942,-79.262029,Panagio's Breakfast & Lunch,43.79237,-79.260203,Breakfast Spot
1,"Agincourt, Agincourt",0.0,0,43.7942,-79.262029,Twilight,43.791999,-79.258584,Lounge
1,"Agincourt, Agincourt",0.0,0,43.7942,-79.262029,El Pulgarcito,43.792648,-79.259208,Latin American Restaurant


In [165]:
# sort the results by Cluster Labels
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged.head()

Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Agincourt North, Milliken, Agincourt North, Mi...",0.0,0,43.815252,-79.284577,Port Royal Park,43.815477,-79.289773,Park
44,"Little Portugal, Trinity, Little Portugal, Tri...",0.0,0,43.647927,-79.41975,Le Dolci,43.650377,-79.415959,Cupcake Shop
44,"Little Portugal, Trinity, Little Portugal, Tri...",0.0,0,43.647927,-79.41975,The Lucky Penny,43.64702,-79.417003,Deli / Bodega
44,"Little Portugal, Trinity, Little Portugal, Tri...",0.0,0,43.647927,-79.41975,Trinity Bellwoods Park,43.647072,-79.413756,Park
44,"Little Portugal, Trinity, Little Portugal, Tri...",0.0,0,43.647927,-79.41975,The Tampered Press,43.650062,-79.41728,Coffee Shop


In [166]:

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [167]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(toclusters)
ys = [i+x+(i*x)**2 for i in range(toclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
map_clusters
print("map success")

map success


In [168]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(to_merged['Venue Latitude'], to_merged['Venue Longitude'], to_merged['Neighborhood'], to_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster))
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters
print("markers success")

markers success


In [175]:
# save the map as HTML file
map_clusters.save('map_clusters.html')