In [84]:
import requests
import lxml.html as lh
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
import folium

In [3]:
!conda install -c conda-forge folium=0.5.0 --yes


Solving environment: done

# All requested packages already installed.



<h2>Scraping the webpage

In [5]:
url='https://www.wikizeroo.org/index.php?q=aHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvTGlzdF9vZl9wb3N0YWxfY29kZXNfb2ZfQ2FuYWRhOl9N'
page = requests.get(url)
doc= lh.fromstring(page.content)
tr_element = doc.xpath('//tr')

Checking the data downlaoded

In [6]:
[len(T) for T in tr_element[:10]]

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

In [7]:
tr_element=doc.xpath('//tr')
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_element[0]:
    i+=1
    name=t.text_content()
    print (i,name)
    col.append((name,[]))

1 Postcode
2 Borough
3 Neighbourhood



In [8]:
for j in range(1,len(tr_element)):
    T=tr_element[j]
    if len(T)!=3:
        break
    i=0  
    for t in T.iterchildren():
        data=t.text_content()
        col[i][1].append(data)
        i+=1
            

In [9]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [10]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 287 entries, 0 to 286
Data columns (total 3 columns):
Postcode          287 non-null object
Borough           287 non-null object
Neighbourhood
    287 non-null object
dtypes: object(3)
memory usage: 6.8+ KB


<h2>Removing not assigned</h2>

In [12]:
df_toronto=df[df.Borough!='Not assigned'].reset_index(drop=True)
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods\n
1,M4A,North York,Victoria Village\n
2,M5A,Downtown Toronto,Harbourfront\n
3,M6A,North York,Lawrence Heights\n
4,M6A,North York,Lawrence Manor\n


<h2>not assigned neighbourhood</h2>

In [13]:
for index, row in df_toronto.iterrows():
    if(row["Neighbourhood\n"]=="Not assigned"):
        row["Neighbourhood"] = row["Borough"]

In [14]:
for index, row in df_toronto.iterrows():
    row["Neighbourhood\n"]=row["Neighbourhood\n"].strip()

In [15]:
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


<h1> Group neighborhoods</h1>

In [16]:
df_toronto_grouped = df_toronto.groupby(["Postcode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
df_toronto_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [17]:
df_toronto_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [19]:
column_names = ["Postcode", "Borough"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(df_toronto_grouped[df_toronto_grouped["Postcode"]==postcode], ignore_index=True,sort=True)
    
test_df

Unnamed: 0,Borough,Neighbourhood,Postcode
0,Downtown Toronto,Central Bay Street,M5G
1,North York,Hillcrest Village,M2H
2,East York,"Woodbine Gardens, Parkview Hill",M4B
3,Scarborough,Scarborough Village,M1J
4,East York,Leaside,M4G
5,East Toronto,Studio District,M4M
6,Scarborough,"Maryvale, Wexford",M1R
7,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",M9V
8,North York,Humber Summit,M9L
9,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",M5V


<h2> print the number of rows of the cleaned dataframe

In [20]:
print(df_toronto_grouped.shape)

(103, 3)


<h2>geographical coordinates of each postal code

In [21]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [22]:
df_data.rename(columns={"Postal Code": "Postcode"}, inplace=True)
df_data.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<h2> Merging two tables

In [23]:
toronto_df_new = df_toronto_grouped.merge(df_data, on="Postcode", how="left")
toronto_df_new.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [25]:
column_names = ["Postcode", "Borough", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_new[toronto_df_new["Postcode"]==postcode], ignore_index=True,sort=False)
    
test_df

Unnamed: 0,Postcode,Borough,Latitude,Longitude,Neighbourhood
0,M5G,Downtown Toronto,43.657952,-79.387383,Central Bay Street
1,M2H,North York,43.803762,-79.363452,Hillcrest Village
2,M4B,East York,43.706397,-79.309937,"Woodbine Gardens, Parkview Hill"
3,M1J,Scarborough,43.744734,-79.239476,Scarborough Village
4,M4G,East York,43.70906,-79.363452,Leaside
5,M4M,East Toronto,43.659526,-79.340923,Studio District
6,M1R,Scarborough,43.750072,-79.295849,"Maryvale, Wexford"
7,M9V,Etobicoke,43.739416,-79.588437,"Albion Gardens, Beaumond Heights, Humbergate, ..."
8,M9L,North York,43.756303,-79.565963,Humber Summit
9,M5V,Downtown Toronto,43.628947,-79.39442,"CN Tower, Bathurst Quay, Island airport, Harbo..."


In [27]:
#map of toronto
address = 'Toronto,ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [37]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat,lng,bor,nei in zip(test_df['Latitude'],test_df['Longitude'],test_df['Borough'],test_df['Neighbourhood\n']):
    nei=nei.split(',')
    for i in nei:
        label = '{}, {}'.format(nei, bor)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat,lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)
    
map_toronto

In [39]:
# The code was removed by Watson Studio for sharing.

In [41]:
test_df.loc[0,'Neighbourhood\n']

'Central Bay Street'

In [42]:
neighborhood_latitude = test_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = test_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = test_df.loc[0, 'Neighbourhood\n'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Central Bay Street are 43.6579524, -79.3873826.


In [43]:
LIMIT = 100
radius = 500 
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=UDRCSI3GCACCAXW2135PJ3YV3P4WDAP2H1K4I5FAZSBJS2SU&client_secret=D5UMIBNADAJSWKP4TJVCEWNGG0RCAASNCN0KSRDDV40IPLKY&v=20180605&ll=43.6579524,-79.3873826&radius=500&limit=100'

In [44]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e2c981f542890001b2c0266'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 88,
  'suggestedBounds': {'ne': {'lat': 43.6624524045, 'lng': -79.38117421839567},
   'sw': {'lat': 43.6534523955, 'lng': -79.39359098160432}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '537d4d6d498ec171ba22e7fe',
       'name': "Jimmy's Coffee",
       'location': {'address': '82 Gerrard Street W',
        'crossStreet': 'Gerrard & LaPlante',
        'lat': 43.65842123574496,
        'lng': -79.38561319551111,
        'label

In [45]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [48]:
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Jimmy's Coffee,Coffee Shop,43.658421,-79.385613
1,Tim Hortons,Coffee Shop,43.65857,-79.385123
2,Hailed Coffee,Coffee Shop,43.658833,-79.383684
3,The Elm Tree Restaurant,Modern European Restaurant,43.657397,-79.383761
4,The Queen and Beaver Public House,Gastropub,43.657472,-79.383524


In [49]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

88 venues were returned by Foursquare.


In [50]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

In [52]:
toronto_venues = getNearbyVenues(names=test_df['Neighbourhood\n'],
                                   latitudes=test_df['Latitude'],
                                   longitudes=test_df['Longitude']
                                  )


Central Bay Street
Hillcrest Village
Woodbine Gardens, Parkview Hill
Scarborough Village
Leaside
Studio District
Maryvale, Wexford
Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown
Humber Summit
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rouge, Malvern
Harbourfront


In [53]:
print(toronto_venues.shape)
toronto_venues.head()

(264, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central Bay Street,43.657952,-79.387383,Jimmy's Coffee,43.658421,-79.385613,Coffee Shop
1,Central Bay Street,43.657952,-79.387383,Tim Hortons,43.65857,-79.385123,Coffee Shop
2,Central Bay Street,43.657952,-79.387383,Hailed Coffee,43.658833,-79.383684,Coffee Shop
3,Central Bay Street,43.657952,-79.387383,The Elm Tree Restaurant,43.657397,-79.383761,Modern European Restaurant
4,Central Bay Street,43.657952,-79.387383,The Queen and Beaver Public House,43.657472,-79.383524,Gastropub


In [54]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",11,11,11,11,11,11
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",17,17,17,17,17,17
Central Bay Street,88,88,88,88,88,88
Harbourfront,48,48,48,48,48,48
Hillcrest Village,4,4,4,4,4,4
Humber Summit,1,1,1,1,1,1
Leaside,33,33,33,33,33,33
"Maryvale, Wexford",4,4,4,4,4,4
"Rouge, Malvern",1,1,1,1,1,1
Scarborough Village,2,2,2,2,2,2


In [55]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 114 uniques categories.


<h1>Analyzing Neighbourhood

In [56]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Asian Restaurant,...,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Store,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [58]:
toronto_onehot.shape

(264, 114)

In [59]:
toronto_grouped=toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Store,Wine Bar
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0
1,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.011364,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,...,0.011364,0.0,0.011364,0.011364,0.011364,0.0,0.0,0.011364,0.0,0.011364
3,Harbourfront,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.020833,...,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0
4,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Leaside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Maryvale, Wexford",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Rouge, Malvern",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0        Grocery Store  0.18
1  Fried Chicken Joint  0.09
2          Pizza Place  0.09
3             Pharmacy  0.09
4       Sandwich Place  0.09


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.18
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3       Coffee Shop  0.06
4               Bar  0.06


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.15
1                Café  0.06
2      Sandwich Place  0.06
3  Italian Restaurant  0.05
4        Burger Joint  0.03


----Harbourfront----
                venue  freq
0         Coffee Shop  0.19
1                 Pub  0.06
2                Park  0.06
3              Bakery  0.06
4  Mexican Restaurant  0.04


----Hillcrest Village----
                   

In [68]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 3, 0, 1, 1, 2, 4], dtype=int32)

In [62]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [64]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Fast Food Restaurant,Sandwich Place,Discount Store,Beer Store,Pizza Place,Japanese Restaurant,Pharmacy,Fried Chicken Joint,Video Store
1,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Coffee Shop,Harbor / Marina,Sculpture Garden,Boat or Ferry,Bar,Rental Car Location,Plane
2,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Japanese Restaurant,Chinese Restaurant,Juice Bar,Ice Cream Shop,Bubble Tea Shop
3,Harbourfront,Coffee Shop,Pub,Bakery,Park,Restaurant,Breakfast Spot,Mexican Restaurant,Café,Hotel,Ice Cream Shop
4,Hillcrest Village,Golf Course,Dog Run,Pool,Mediterranean Restaurant,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space,Deli / Bodega


In [74]:

neighborhoods_venues_sorted.insert(0, 'Cluster Lab', kmeans.labels_)

toronto_m = test_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_m = toronto_m.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood\n')
toronto_m.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Latitude,Longitude,Neighbourhood,Cluster Lab,Cluster Labe,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5G,Downtown Toronto,43.657952,-79.387383,Central Bay Street,1,1,1,1,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Japanese Restaurant,Chinese Restaurant,Juice Bar,Ice Cream Shop,Bubble Tea Shop
1,M2H,North York,43.803762,-79.363452,Hillcrest Village,3,3,3,3,Golf Course,Dog Run,Pool,Mediterranean Restaurant,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space,Deli / Bodega
2,M4B,East York,43.706397,-79.309937,"Woodbine Gardens, Parkview Hill",1,1,1,1,Pizza Place,Fast Food Restaurant,Pharmacy,Bus Line,Pet Store,Bank,Intersection,Athletics & Sports,Café,Gym / Fitness Center
3,M1J,Scarborough,43.744734,-79.239476,Scarborough Village,4,4,4,4,Playground,Convenience Store,Wine Bar,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Coworking Space,Deli / Bodega,Department Store
4,M4G,East York,43.70906,-79.363452,Leaside,1,1,1,1,Coffee Shop,Sporting Goods Shop,Burger Joint,Furniture / Home Store,Restaurant,Beer Store,Bike Shop,Breakfast Spot,Brewery,Pet Store


In [75]:
toronto_m.drop(['Cluster Labels','Cluster Label','Cluster Labe'],axis=1)

Unnamed: 0,Postcode,Borough,Latitude,Longitude,Neighbourhood,Cluster Lab,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5G,Downtown Toronto,43.657952,-79.387383,Central Bay Street,1,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Japanese Restaurant,Chinese Restaurant,Juice Bar,Ice Cream Shop,Bubble Tea Shop
1,M2H,North York,43.803762,-79.363452,Hillcrest Village,3,Golf Course,Dog Run,Pool,Mediterranean Restaurant,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space,Deli / Bodega
2,M4B,East York,43.706397,-79.309937,"Woodbine Gardens, Parkview Hill",1,Pizza Place,Fast Food Restaurant,Pharmacy,Bus Line,Pet Store,Bank,Intersection,Athletics & Sports,Café,Gym / Fitness Center
3,M1J,Scarborough,43.744734,-79.239476,Scarborough Village,4,Playground,Convenience Store,Wine Bar,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Coworking Space,Deli / Bodega,Department Store
4,M4G,East York,43.70906,-79.363452,Leaside,1,Coffee Shop,Sporting Goods Shop,Burger Joint,Furniture / Home Store,Restaurant,Beer Store,Bike Shop,Breakfast Spot,Brewery,Pet Store
5,M4M,East Toronto,43.659526,-79.340923,Studio District,1,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Brewery,Gastropub,Wine Bar,Diner,Middle Eastern Restaurant
6,M1R,Scarborough,43.750072,-79.295849,"Maryvale, Wexford",1,Smoke Shop,Breakfast Spot,Middle Eastern Restaurant,Bakery,Dog Run,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space
7,M9V,Etobicoke,43.739416,-79.588437,"Albion Gardens, Beaumond Heights, Humbergate, ...",1,Grocery Store,Fast Food Restaurant,Sandwich Place,Discount Store,Beer Store,Pizza Place,Japanese Restaurant,Pharmacy,Fried Chicken Joint,Video Store
8,M9L,North York,43.756303,-79.565963,Humber Summit,0,Empanada Restaurant,Wine Bar,Dog Run,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space,Deli / Bodega
9,M5V,Downtown Toronto,43.628947,-79.39442,"CN Tower, Bathurst Quay, Island airport, Harbo...",1,Airport Service,Airport Lounge,Airport Terminal,Coffee Shop,Harbor / Marina,Sculpture Garden,Boat or Ferry,Bar,Rental Car Location,Plane


In [85]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.core.display import HTML

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_m['Latitude'], toronto_m['Longitude'], toronto_m['Neighbourhood\n'], toronto_m['Cluster Lab']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
HTML(map_clusters._repr_html_())       


In [80]:
toronto_m.loc[toronto_m['Cluster Lab'] == 1, toronto_m.columns[[1] + list(range(5, toronto_m.shape[1]))]]

Unnamed: 0,Borough,Cluster Lab,Cluster Labe,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,1,1,1,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Japanese Restaurant,Chinese Restaurant,Juice Bar,Ice Cream Shop,Bubble Tea Shop
2,East York,1,1,1,1,Pizza Place,Fast Food Restaurant,Pharmacy,Bus Line,Pet Store,Bank,Intersection,Athletics & Sports,Café,Gym / Fitness Center
4,East York,1,1,1,1,Coffee Shop,Sporting Goods Shop,Burger Joint,Furniture / Home Store,Restaurant,Beer Store,Bike Shop,Breakfast Spot,Brewery,Pet Store
5,East Toronto,1,1,1,1,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Brewery,Gastropub,Wine Bar,Diner,Middle Eastern Restaurant
6,Scarborough,1,1,1,1,Smoke Shop,Breakfast Spot,Middle Eastern Restaurant,Bakery,Dog Run,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space
7,Etobicoke,1,1,1,1,Grocery Store,Fast Food Restaurant,Sandwich Place,Discount Store,Beer Store,Pizza Place,Japanese Restaurant,Pharmacy,Fried Chicken Joint,Video Store
9,Downtown Toronto,1,1,1,1,Airport Service,Airport Lounge,Airport Terminal,Coffee Shop,Harbor / Marina,Sculpture Garden,Boat or Ferry,Bar,Rental Car Location,Plane
11,Downtown Toronto,1,1,1,1,Coffee Shop,Pub,Bakery,Park,Restaurant,Breakfast Spot,Mexican Restaurant,Café,Hotel,Ice Cream Shop


In [81]:
toronto_m.loc[toronto_m['Cluster Lab'] == 2, toronto_m.columns[[1] + list(range(5, toronto_m.shape[1]))]]

Unnamed: 0,Borough,Cluster Lab,Cluster Labe,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Scarborough,2,2,2,2,Fast Food Restaurant,Wine Bar,Dog Run,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space,Deli / Bodega


In [82]:
toronto_m.loc[toronto_m['Cluster Lab'] == 3, toronto_m.columns[[1] + list(range(5, toronto_m.shape[1]))]]

Unnamed: 0,Borough,Cluster Lab,Cluster Labe,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3,3,3,3,Golf Course,Dog Run,Pool,Mediterranean Restaurant,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Coworking Space,Deli / Bodega


In [83]:
toronto_m.loc[toronto_m['Cluster Lab'] == 4, toronto_m.columns[[1] + list(range(5, toronto_m.shape[1]))]]

Unnamed: 0,Borough,Cluster Lab,Cluster Labe,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,4,4,4,4,Playground,Convenience Store,Wine Bar,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Coworking Space,Deli / Bodega,Department Store
