# This notebook contains the 3 questions from the Segmenting and Clustering Neighborhoods in Toronto task. So please, scroll down to visualize the other questions

# Question 1

### Let's first impor the libraries to get the data from the url

In [2]:
import requests
canada_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
print('Data downloaded!')

Data downloaded!


### Using the BeautifulSoup we can select the exact table through the html code

In [3]:
from bs4 import BeautifulSoup
canada_soup = BeautifulSoup(canada_url, 'lxml')

#### Here we'll get only the data from the table that belongs to the 'wikitable sortable'

In [4]:
table_canada = canada_soup.find('table', {'class': 'wikitable sortable'})
print(table_canada.prettify())

<table class="wikitable sortable">
 <tbody>
  <tr>
   <th>
    Postcode
   </th>
   <th>
    Borough
   </th>
   <th>
    Neighbourhood
   </th>
  </tr>
  <tr>
   <td>
    M1A
   </td>
   <td>
    Not assigned
   </td>
   <td>
    Not assigned
   </td>
  </tr>
  <tr>
   <td>
    M2A
   </td>
   <td>
    Not assigned
   </td>
   <td>
    Not assigned
   </td>
  </tr>
  <tr>
   <td>
    M3A
   </td>
   <td>
    <a href="/wiki/North_York" title="North York">
     North York
    </a>
   </td>
   <td>
    <a href="/wiki/Parkwoods" title="Parkwoods">
     Parkwoods
    </a>
   </td>
  </tr>
  <tr>
   <td>
    M4A
   </td>
   <td>
    <a href="/wiki/North_York" title="North York">
     North York
    </a>
   </td>
   <td>
    <a href="/wiki/Victoria_Village" title="Victoria Village">
     Victoria Village
    </a>
   </td>
  </tr>
  <tr>
   <td>
    M5A
   </td>
   <td>
    <a href="/wiki/Downtown_Toronto" title="Downtown Toronto">
     Downtown Toronto
    </a>
   </td>
   <td>
    <a href="

#### Selecting only the table headers

In [5]:
headers = canada_soup.tr.text
print(headers)


Postcode
Borough
Neighbourhood



### After getting the data in html, we'll transform and separate the rows by comma

In [6]:
canada_tb=""
for tr in canada_soup.find_all('table',{'class':'wikitable sortable'}):
    row1=""
    for tds in tr.find_all('td'):
        row1=row1+","+tds.text
    canada_tb=canada_tb+row1[1:]
print(canada_tb)

M1A,Not assigned,Not assigned
,M2A,Not assigned,Not assigned
,M3A,North York,Parkwoods
,M4A,North York,Victoria Village
,M5A,Downtown Toronto,Harbourfront
,M5A,Downtown Toronto,Regent Park
,M6A,North York,Lawrence Heights
,M6A,North York,Lawrence Manor
,M7A,Queen's Park,Not assigned
,M8A,Not assigned,Not assigned
,M9A,Etobicoke,Islington Avenue
,M1B,Scarborough,Rouge
,M1B,Scarborough,Malvern
,M2B,Not assigned,Not assigned
,M3B,North York,Don Mills North
,M4B,East York,Woodbine Gardens
,M4B,East York,Parkview Hill
,M5B,Downtown Toronto,Ryerson
,M5B,Downtown Toronto,Garden District
,M6B,North York,Glencairn
,M7B,Not assigned,Not assigned
,M8B,Not assigned,Not assigned
,M9B,Etobicoke,Cloverdale
,M9B,Etobicoke,Islington
,M9B,Etobicoke,Martin Grove
,M9B,Etobicoke,Princess Gardens
,M9B,Etobicoke,West Deane Park
,M1C,Scarborough,Highland Creek
,M1C,Scarborough,Rouge Hill
,M1C,Scarborough,Port Union
,M2C,Not assigned,Not assigned
,M3C,North York,Flemingdon Park
,M3C,North York,Don Mills South


#### Import Numpy and Pandas to deal with the data and create the dataframe

In [7]:
import numpy as np
import pandas as pd

#### First we create a file to store the data

In [8]:
file=open("canada.csv","wb")
#file.write(bytes(headers,encoding="ascii",errors="ignore"))
file.write(bytes(canada_tb,encoding="ascii",errors="ignore"))

9025

#### Now we can read and visualize it

In [9]:
df = pd.read_csv('canada.csv')
df.head()

Unnamed: 0,M1A,Not assigned,Not assigned.1
,M2A,Not assigned,Not assigned
,M3A,North York,Parkwoods
,M4A,North York,Victoria Village
,M5A,Downtown Toronto,Harbourfront
,M5A,Downtown Toronto,Regent Park


#### Defining the name of the columns

In [10]:
df.columns=['Postalcode','Borough','Neighbourhood']
df.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
,M2A,Not assigned,Not assigned
,M3A,North York,Parkwoods
,M4A,North York,Victoria Village
,M5A,Downtown Toronto,Harbourfront
,M5A,Downtown Toronto,Regent Park


#### Adjusting the table, removing 'Not assigned' rows from the Borough column and assigning the same Borough values to the 'Not assigned' Neighbourhoods

In [11]:
df_canada = df.reset_index(drop = True)
df_canada = df_canada[df_canada.Borough != 'Not assigned']
df_canada.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M5A,Downtown Toronto,Regent Park
5,M6A,North York,Lawrence Heights


In [12]:
df_canada.loc[df_canada['Neighbourhood']=='Not assigned','Neighbourhood']=df_canada['Borough']
df_canada.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M5A,Downtown Toronto,Regent Park
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Queen's Park,Queen's Park
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern


#### Finally, we'll group by Postalcode and Borough

In [13]:
df_grouped = df_canada.groupby(['Postalcode', 'Borough'], sort=False).agg(', '.join).reset_index()
df_grouped.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


#### Using the shape method to print the number of columns and rows

In [14]:
df_grouped1 = df_grouped
df_grouped1.shape

(103, 3)

# Question 2

In [16]:
!pip install geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 15.5MB/s ta 0:00:01
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [18]:
import geocoder

In [19]:
def get_latlng(postal_code):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    return lat_lng_coords    
get_latlng('M4G')

[43.70949500000006, -79.36398897099997]

In [20]:
df_postalcodes = df_grouped['Postalcode']
coordinates = [get_latlng(postal_code) for postal_code in df_postalcodes.tolist()]
df_coords = pd.DataFrame(coordinates, columns=['Latitude', 'Longitude'])
df_grouped['Latitude'] = df_coords['Latitude']
df_grouped['Longitude'] = df_coords['Longitude']

In [21]:
df_grouped.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75242,-79.329242
1,M4A,North York,Victoria Village,43.7306,-79.313265
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.650295,-79.359166
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.72327,-79.451286
4,M7A,Queen's Park,Queen's Park,43.66115,-79.391715


In [22]:
df_grouped[df_grouped.Postalcode == 'M5G']

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
24,M5G,Downtown Toronto,Central Bay Street,43.656091,-79.38493


# Question 3

#### Let's import and install what we'll need!

In [23]:
# matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
# k-means for clustering
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
# map rendering library
import folium 

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be 

In [24]:
from geopy.geocoders import Nominatim

In [25]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude
print('Toronto´s coordinate are {}, {}.'.format(latitude_toronto, longitude_toronto))

Toronto´s coordinate are 43.653963, -79.387207.


#### Creating a map of Toronto to help visualize

In [26]:
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=10)

for lat, lng, borough, Neighbourhood in zip(df_grouped['Latitude'], df_grouped['Longitude'], df_grouped['Borough'], df_grouped['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Inserting the credentials, version and defining the radius, limit to get the nearby venues

In [58]:
# The code was removed by Watson Studio for sharing.

In [28]:
radius=500
LIMIT=100

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now writing the code to run the function on each neighborhood to create a dataframe for the venues

In [31]:
df_venues = getNearbyVenues(names=df_grouped['Neighbourhood'],
                                   latitudes=df_grouped['Latitude'],
                                   longitudes=df_grouped['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront, Regent Park
Lawrence Heights, Lawrence Manor
Queen's Park
Islington Avenue
Rouge, Malvern
Don Mills North
Woodbine Gardens, Parkview Hill
Ryerson, Garden District
Glencairn
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Highland Creek, Rouge Hill, Port Union
Flemingdon Park, Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
CFB Toronto, Downsview East
The D

#### After that, we can visualize these results in a dataframe

In [32]:
print('df_venues.shape :', df_venues.shape)
df_venues.head(20)

df_venues.shape : (2466, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75242,-79.329242,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75242,-79.329242,Brookbanks Pool,43.751389,-79.332184,Pool
2,Parkwoods,43.75242,-79.329242,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Parkwoods,43.75242,-79.329242,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
4,Victoria Village,43.7306,-79.313265,Wigmore Park,43.731023,-79.310771,Park
5,Victoria Village,43.7306,-79.313265,Memories of Africa,43.726602,-79.312427,Grocery Store
6,Victoria Village,43.7306,-79.313265,Vinnia Meats,43.730465,-79.30752,German Restaurant
7,"Harbourfront, Regent Park",43.650295,-79.359166,The Distillery Historic District,43.650244,-79.359323,Historic Site
8,"Harbourfront, Regent Park",43.650295,-79.359166,Arvo,43.649963,-79.361442,Coffee Shop
9,"Harbourfront, Regent Park",43.650295,-79.359166,Distillery Sunday Market,43.650075,-79.361832,Farmers Market


#### Let's group by Neighbourhood

In [33]:
df_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,14,14,14,14,14,14
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,2,2,2,2,2
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",13,13,13,13,13,13
"Alderwood, Long Branch",5,5,5,5,5,5
"Bathurst Manor, Downsview North, Wilson Heights",1,1,1,1,1,1
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",21,21,21,21,21,21
Berczy Park,63,63,63,63,63,63
"Birch Cliff, Cliffside West",6,6,6,6,6,6


### Let's prepare the data to analyse each Neighbourhood

In [34]:
# one hot encoding
df_toronto_onehot = pd.get_dummies(df_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
df_toronto_onehot['Neighbourhood'] = df_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [df_toronto_onehot.columns[-1]] + list(df_toronto_onehot.columns[:-1])
df_toronto_onehot = df_toronto_onehot[fixed_columns]

df_toronto_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Let's check the shape of the dataframe before grouping by

In [35]:
df_toronto_onehot.shape

(2466, 259)

In [36]:
toronto_grouped = df_toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,"Adelaide, King, Richmond",0.000000,0.00,0.030000,0.000000,0.010000,0.000000,0.030000,0.000000,0.0,...,0.010000,0.000000,0.000000,0.000000,0.010000,0.0,0.000000,0.0,0.000000,0.0
1,Agincourt,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.071429,0.000000,0.0,0.000000,0.0,0.000000,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.076923,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
4,"Alderwood, Long Branch",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
5,"Bathurst Manor, Downsview North, Wilson Heights",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
6,Bayview Village,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
7,"Bedford Park, Lawrence Manor East",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
8,Berczy Park,0.000000,0.00,0.000000,0.000000,0.015873,0.000000,0.000000,0.000000,0.0,...,0.015873,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
9,"Birch Cliff, Cliffside West",0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0


#### And here, after grouping

In [37]:
toronto_grouped.shape

(99, 259)

#### Let's print each neighborhood along with the top 5 most common venues

In [38]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
            venue  freq
0     Coffee Shop  0.07
1            Café  0.06
2           Hotel  0.05
3      Steakhouse  0.04
4  Breakfast Spot  0.03


----Agincourt----
                venue  freq
0       Shopping Mall  0.14
1  Chinese Restaurant  0.14
2      Discount Store  0.07
3                Park  0.07
4              Bakery  0.07


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
              venue  freq
0          Pharmacy   0.5
1  Sushi Restaurant   0.5
2     Movie Theater   0.0
3      Noodle House   0.0
4         Nightclub   0.0


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0        Grocery Store  0.15
1         Liquor Store  0.08
2  Fried Chicken Joint  0.08
3           Beer Store  0.08
4          Pizza Place  0.08


----Alderwood, Long Branch----
               venue  freq
0     Sandwich Place   0.2
1  Convenience Store   0.2
2   

#### Let's put into a pandas dataframe

In [39]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [56]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,Steakhouse,Japanese Restaurant,American Restaurant,Bar,Breakfast Spot,Burger Joint,Asian Restaurant
1,Agincourt,Chinese Restaurant,Shopping Mall,Department Store,Sushi Restaurant,Bakery,Discount Store,Badminton Court,Supermarket,Hong Kong Restaurant,Park
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Pharmacy,Sushi Restaurant,Zoo Exhibit,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fish & Chips Shop
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Park,Beer Store,Coffee Shop,Fried Chicken Joint,Liquor Store,Sandwich Place,Video Store,Fast Food Restaurant
4,"Alderwood, Long Branch",Sandwich Place,Pub,Gym,Recording Studio,Convenience Store,Cuban Restaurant,Eastern European Restaurant,Flea Market,Fish Market,Fish & Chips Shop


### Here we have an important step: Clustering Neighbourhoods

In [54]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([0, 0, 0, 0, 0, 1, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0,
       0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 4, 0, 0, 0, 4, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3,
       4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4], dtype=int32)

In [57]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster_Labels', kmeans.labels_)
toronto_merged = df_grouped

# merge toronto_grouped with df_grouped to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

In [43]:
toronto_merged.head(20)

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75242,-79.329242,4.0,Food & Drink Shop,Pool,Bus Stop,Park,Farmers Market,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Field
1,M4A,North York,Victoria Village,43.7306,-79.313265,4.0,German Restaurant,Grocery Store,Park,Farm,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Zoo Exhibit,Electronics Store
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.650295,-79.359166,0.0,Coffee Shop,Bakery,Theater,Boat or Ferry,Brewery,Farmers Market,Bank,Cosmetics Shop,Shoe Store,Café
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.72327,-79.451286,0.0,Clothing Store,Electronics Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Pharmacy,Toy / Game Store,Food Court,American Restaurant,Kitchen Supply Store
4,M7A,Queen's Park,Queen's Park,43.66115,-79.391715,0.0,Coffee Shop,Café,Sandwich Place,Gym,Bookstore,College Cafeteria,Italian Restaurant,Pharmacy,Gastropub,Smoothie Shop
5,M9A,Etobicoke,Islington Avenue,43.662299,-79.528195,0.0,Pharmacy,Bank,Shopping Mall,Café,Park,Skating Rink,Grocery Store,Zoo Exhibit,Falafel Restaurant,Ethiopian Restaurant
6,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,2.0,Zoo Exhibit,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant
7,M3B,North York,Don Mills North,43.749055,-79.362227,0.0,Park,Burger Joint,Bus Line,Coffee Shop,Zoo Exhibit,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.707535,-79.311773,0.0,Fast Food Restaurant,Pizza Place,Bus Line,Gym / Fitness Center,Rock Climbing Spot,Bank,Intersection,Breakfast Spot,Gastropub,Athletics & Sports
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818,0.0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Ramen Restaurant,Restaurant


#### Let's drop rows that contains NaN values

In [44]:
toronto_merged=toronto_merged.dropna()

In [45]:
toronto_merged.head(20)

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75242,-79.329242,4.0,Food & Drink Shop,Pool,Bus Stop,Park,Farmers Market,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Field
1,M4A,North York,Victoria Village,43.7306,-79.313265,4.0,German Restaurant,Grocery Store,Park,Farm,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Zoo Exhibit,Electronics Store
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.650295,-79.359166,0.0,Coffee Shop,Bakery,Theater,Boat or Ferry,Brewery,Farmers Market,Bank,Cosmetics Shop,Shoe Store,Café
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.72327,-79.451286,0.0,Clothing Store,Electronics Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Pharmacy,Toy / Game Store,Food Court,American Restaurant,Kitchen Supply Store
4,M7A,Queen's Park,Queen's Park,43.66115,-79.391715,0.0,Coffee Shop,Café,Sandwich Place,Gym,Bookstore,College Cafeteria,Italian Restaurant,Pharmacy,Gastropub,Smoothie Shop
5,M9A,Etobicoke,Islington Avenue,43.662299,-79.528195,0.0,Pharmacy,Bank,Shopping Mall,Café,Park,Skating Rink,Grocery Store,Zoo Exhibit,Falafel Restaurant,Ethiopian Restaurant
6,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,2.0,Zoo Exhibit,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant
7,M3B,North York,Don Mills North,43.749055,-79.362227,0.0,Park,Burger Joint,Bus Line,Coffee Shop,Zoo Exhibit,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.707535,-79.311773,0.0,Fast Food Restaurant,Pizza Place,Bus Line,Gym / Fitness Center,Rock Climbing Spot,Bank,Intersection,Breakfast Spot,Gastropub,Athletics & Sports
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818,0.0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Ramen Restaurant,Restaurant


In [46]:
toronto_merged['Cluster_Labels'] = toronto_merged.Cluster_Labels.astype(int)

In [47]:
# create map
map_clusters = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examining clusters

#### Cluster 1

In [48]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Bakery,Theater,Boat or Ferry,Brewery,Farmers Market,Bank,Cosmetics Shop,Shoe Store,Café
3,North York,0,Clothing Store,Electronics Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Pharmacy,Toy / Game Store,Food Court,American Restaurant,Kitchen Supply Store
4,Queen's Park,0,Coffee Shop,Café,Sandwich Place,Gym,Bookstore,College Cafeteria,Italian Restaurant,Pharmacy,Gastropub,Smoothie Shop
5,Etobicoke,0,Pharmacy,Bank,Shopping Mall,Café,Park,Skating Rink,Grocery Store,Zoo Exhibit,Falafel Restaurant,Ethiopian Restaurant
7,North York,0,Park,Burger Joint,Bus Line,Coffee Shop,Zoo Exhibit,Event Space,Falafel Restaurant,Farm,Farmers Market,Field
8,East York,0,Fast Food Restaurant,Pizza Place,Bus Line,Gym / Fitness Center,Rock Climbing Spot,Bank,Intersection,Breakfast Spot,Gastropub,Athletics & Sports
9,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Ramen Restaurant,Restaurant
10,North York,0,Pizza Place,Fast Food Restaurant,Mediterranean Restaurant,Japanese Restaurant,Latin American Restaurant,Grocery Store,Rental Car Location,Sushi Restaurant,Asian Restaurant,Fish Market
12,Scarborough,0,History Museum,Bar,Zoo Exhibit,Farmers Market,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Field
13,North York,0,Intersection,Bubble Tea Shop,Beer Store,Coffee Shop,Supermarket,Grocery Store,Gym,Zoo Exhibit,Event Space,Falafel Restaurant


#### Cluster 2

In [49]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,North York,1,Men's Store,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant


#### Cluster 3

In [50]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,2,Zoo Exhibit,Cosmetics Shop,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant


#### Cluster 4

In [51]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,York,3,Brewery,Home Service,Furniture / Home Store,Athletics & Sports,Food,Flower Shop,Food & Drink Shop,Flea Market,Ethiopian Restaurant,Fish Market


#### Cluster 5

In [52]:
toronto_merged.loc[toronto_merged['Cluster_Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4,Food & Drink Shop,Pool,Bus Stop,Park,Farmers Market,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Field
1,North York,4,German Restaurant,Grocery Store,Park,Farm,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Zoo Exhibit,Electronics Store
18,Scarborough,4,Construction & Landscaping,Park,Gym / Fitness Center,Zoo Exhibit,Farm,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
21,York,4,Park,Market,Sporting Goods Shop,Bakery,Beer Store,Gym,Fast Food Restaurant,Mexican Restaurant,Women's Store,Fish & Chips Shop
36,Downtown Toronto,4,Harbor / Marina,Pier,Park,Zoo Exhibit,Farmers Market,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
39,North York,4,Construction & Landscaping,Dog Run,Trail,Park,Zoo Exhibit,Farm,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
41,East Toronto,4,Bus Line,Grocery Store,Discount Store,Park,Zoo Exhibit,Farm,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
49,North York,4,Basketball Court,Bakery,Park,Farmers Market,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Zoo Exhibit,Elementary School
58,Scarborough,4,Gym Pool,General Entertainment,Skating Rink,Park,College Stadium,Gym,Zoo Exhibit,Elementary School,Ethiopian Restaurant,Event Space
66,North York,4,Convenience Store,Park,Speakeasy,Bank,Cupcake Shop,Elementary School,Flower Shop,Creperie,Flea Market,Fish Market
