In [1]:
import requests
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [2]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url,'lxml')


In [3]:
My_table = soup.find('table',{'class':'wikitable sortable'})

In [4]:
links = My_table.findAll('tr')

In [5]:
torPostCodes = []
torBorough = []
torNeigh =[]
for link in links:
    cells = link.findAll('td')
    if len(cells) == 3:
        torPostCodes.append(cells[0].find(text=True))
        torBorough.append(cells[1].find(text=True))
        torNeigh.append(cells[2].find(text=True))

In [6]:
import pandas as pd
df = pd.DataFrame()
df['PostalCode'] = torPostCodes
df['Borough'] = torBorough
df['Neighbourhood'] = torNeigh

df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [7]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [8]:
df = df.groupby(['PostalCode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood\n, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae\n


In [9]:
df.shape

(103, 3)

In [10]:
import pandas as pd
geoData = pd.read_csv('https://cocl.us/Geospatial_data')
#geoData['Postal Code'].value_counts()

In [11]:
geoData.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
mergedData = df.join(geoData)

In [13]:
mergedData.drop(['Postal Code'],axis=1)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood\n, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae\n,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park\n, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West\n",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West\n",43.692657,-79.264848


In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(mergedData['Borough'].unique()),
        mergedData.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [18]:
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done


  current version: 4.5.12
  latest version: 4.7.10

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/rdhakal2/anaconda3

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    readline-8.0               |       hf8c457e_0         441 KB  conda-forge
    sqlite-3.29.0              |       hcee41ef_0         2.0 MB  conda-forge
    zlib-1.2.11                |    h516909a_1005         105 KB  conda-forge
    ncurses-6.1                |    hf484d3e_1002         1.3 MB  conda-forge
    bzip2-1.0.8                |       h516909a_0         396 KB  conda-forge
    pip-19.2.1                 |           py37_0         1.9 MB  conda-forge
    tk-8.6.9                   |    hed695b0_1002         3.2 MB  conda-forge
    geopy-1.20.0               |        

In [20]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [22]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(mergedData['Latitude'], mergedData['Longitude'], mergedData['Borough'], mergedData['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [23]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [25]:
tor_data = mergedData[mergedData['Borough'].str.contains("Toronto")]
tor_data

Unnamed: 0,PostalCode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
37,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West\n, Riverdale",M4K,43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West\n, India Bazaar",M4L,43.668999,-79.315572
43,M4M,East Toronto,Studio District\n,M4M,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North\n,M4P,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West\n,M4R,43.715383,-79.405678
47,M4S,Central Toronto,Davisville\n,M4S,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East\n",M4T,43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE\n, Rathnelly, South ...",M4V,43.686412,-79.400049


In [27]:
# create map of Manhattan using latitude and longitude values
subTorMap = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(tor_data['Latitude'], tor_data['Longitude'], tor_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(subTorMap)  
    
subTorMap

In [28]:
CLIENT_ID = 'USXJUSSTNA1A3ZXB45FED0YUCNF0OBI51ZE03RXYIJDTNGX4' # your Foursquare ID
CLIENT_SECRET = '2EMVY2HXUMA5RUCGZYXYWYZT5TNQLRZVDAC4QGCRBDQYEQDD' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: USXJUSSTNA1A3ZXB45FED0YUCNF0OBI51ZE03RXYIJDTNGX4
CLIENT_SECRET:2EMVY2HXUMA5RUCGZYXYWYZT5TNQLRZVDAC4QGCRBDQYEQDD


In [30]:
address = 'Rosedale, Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
lat = location.latitude
lon = location.longitude
print(lat,lon)

  This is separate from the ipykernel package so we can avoid doing imports until


43.6783556 -79.3807457


In [31]:
LIMIT=100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat, 
    lon, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=USXJUSSTNA1A3ZXB45FED0YUCNF0OBI51ZE03RXYIJDTNGX4&client_secret=2EMVY2HXUMA5RUCGZYXYWYZT5TNQLRZVDAC4QGCRBDQYEQDD&v=20180605&ll=43.6783556,-79.3807457&radius=500&limit=100'

In [32]:
results = requests.get(url).json()

In [33]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [35]:
json_normalize = pd.io.json.json_normalize

In [37]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Mooredale House,Building,43.678631,-79.380091
1,Alex Murray Parkette,Park,43.6783,-79.382773
2,Betline Trail at Roxborough dr.,Bike Trail,43.68053,-79.38149
3,"Daniel H Kayfetz B.A., LL.B Criminal Lawyer",Lawyer,43.676798,-79.378073
4,Rosedale Park,Playground,43.682328,-79.378934


In [38]:
nearby_venues.shape


(5, 4)

In [39]:

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [41]:
tor_venues = getNearbyVenues(names=tor_data['Neighbourhood'],
                                   latitudes=tor_data['Latitude'],
                                   longitudes=tor_data['Longitude']
                                  )

The Beaches
The Danforth West
, Riverdale
The Beaches West
, India Bazaar
Studio District

Lawrence Park
Davisville North

North Toronto West

Davisville

Moore Park, Summerhill East

Deer Park, Forest Hill SE
, Rathnelly, South Hill, Summerhill West

Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson
, Garden District

St. James Town
Berczy Park
Central Bay Street

Adelaide
, King
, Richmond

Harbourfront East
, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel

Roselawn

Forest Hill North, Forest Hill West

The Annex, North Midtown
, Yorkville
Harbord
, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay
, Island airport
, Harbourfront West
, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade

First Canadian Place, Underground city
Christie

Dovercourt Village, Dufferin

Little Portugal, Trinity
Brockton
, Exhibition Place, Park

In [42]:
tor_venues.head()


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West\n, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [43]:
tor_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide\n, King\n, Richmond\n",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton\n, Exhibition Place, Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern\n,19,19,19,19,19,19
"CN Tower, Bathurst Quay\n, Island airport\n, Harbourfront West\n, King and Spadina, Railway Lands, South Niagara",17,17,17,17,17,17
"Cabbagetown, St. James Town",48,48,48,48,48,48
Central Bay Street\n,86,86,86,86,86,86
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie\n,16,16,16,16,16,16
Church and Wellesley,86,86,86,86,86,86


In [45]:

# one hot encoding
tor_onehot = pd.get_dummies(tor_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighborhood'] = tor_venues['Neighborhood'] 

# move neighborhood column to the first column
# fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
# toronto_onehot = toronto_onehot[fixed_columns]

tor_onehot.Neighborhood

0                                             The Beaches
1                                             The Beaches
2                                             The Beaches
3                                             The Beaches
4                          The Danforth West\n, Riverdale
5                          The Danforth West\n, Riverdale
6                          The Danforth West\n, Riverdale
7                          The Danforth West\n, Riverdale
8                          The Danforth West\n, Riverdale
9                          The Danforth West\n, Riverdale
10                         The Danforth West\n, Riverdale
11                         The Danforth West\n, Riverdale
12                         The Danforth West\n, Riverdale
13                         The Danforth West\n, Riverdale
14                         The Danforth West\n, Riverdale
15                         The Danforth West\n, Riverdale
16                         The Danforth West\n, Riverdale
17            

In [46]:
grpTor = tor_onehot.groupby('Neighborhood').mean().reset_index()
grpTor.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide\n, King\n, Richmond\n",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,"Brockton\n, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 East...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,"CN Tower, Bathurst Quay\n, Island airport\n, H...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [47]:
grpTor.shape


(38, 238)

In [49]:

num_top_venues = 5

for hood in grpTor['Neighborhood']:
    print("----"+hood+"----")
    temp = grpTor[grpTor['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide
, King
, Richmond
----
             venue  freq
0      Coffee Shop  0.07
1             Café  0.05
2  Thai Restaurant  0.04
3       Steakhouse  0.04
4              Bar  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.04
3            Beer Bar  0.04
4                Café  0.04


----Brockton
, Exhibition Place, Parkdale Village----
                  venue  freq
0        Breakfast Spot  0.09
1                  Café  0.09
2           Coffee Shop  0.09
3         Grocery Store  0.05
4  Caribbean Restaurant  0.05


----Business Reply Mail Processing Centre 969 Eastern
----
                venue  freq
0  Light Rail Station  0.11
1          Restaurant  0.05
2             Brewery  0.05
3         Pizza Place  0.05
4                 Spa  0.05


----CN Tower, Bathurst Quay
, Island airport
, Harbourfront West
, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport S

In [50]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [53]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues = pd.DataFrame(columns=columns)
neighborhoods_venues['Neighborhood'] = grpTor['Neighborhood']

for ind in np.arange(grpTor.shape[0]):
    neighborhoods_venues.iloc[ind, 1:] = return_most_common_venues(grpTor.iloc[ind, :], num_top_venues)

neighborhoods_venues.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide\n, King\n, Richmond\n",Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Hotel,Asian Restaurant,Breakfast Spot,Restaurant,Cosmetics Shop
1,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Steakhouse,Bakery,Beer Bar,Seafood Restaurant,Café,Farmers Market,Clothing Store
2,"Brockton\n, Exhibition Place, Parkdale Village",Breakfast Spot,Coffee Shop,Café,Falafel Restaurant,Restaurant,Caribbean Restaurant,Bar,Climbing Gym,Furniture / Home Store,Italian Restaurant
3,Business Reply Mail Processing Centre 969 East...,Light Rail Station,Yoga Studio,Spa,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Recording Studio
4,"CN Tower, Bathurst Quay\n, Island airport\n, H...",Airport Service,Airport Lounge,Airport Terminal,Boutique,Plane,Sculpture Garden,Boat or Ferry,Bar,Coffee Shop,Harbor / Marina


In [54]:
grpTor.head()


Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide\n, King\n, Richmond\n",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,"Brockton\n, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 East...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,"CN Tower, Bathurst Quay\n, Island airport\n, H...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
kclusters = 5

grpTor_cluster = grpTor.drop('Neighborhood', axis=1)
grpTor_cluster.head()

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grpTor_cluster)

# # check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
       1, 0, 3, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [56]:
grpTor_cluster.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [57]:
neighborhoods_venues


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide\n, King\n, Richmond\n",Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Hotel,Asian Restaurant,Breakfast Spot,Restaurant,Cosmetics Shop
1,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Steakhouse,Bakery,Beer Bar,Seafood Restaurant,Café,Farmers Market,Clothing Store
2,"Brockton\n, Exhibition Place, Parkdale Village",Breakfast Spot,Coffee Shop,Café,Falafel Restaurant,Restaurant,Caribbean Restaurant,Bar,Climbing Gym,Furniture / Home Store,Italian Restaurant
3,Business Reply Mail Processing Centre 969 East...,Light Rail Station,Yoga Studio,Spa,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Recording Studio
4,"CN Tower, Bathurst Quay\n, Island airport\n, H...",Airport Service,Airport Lounge,Airport Terminal,Boutique,Plane,Sculpture Garden,Boat or Ferry,Bar,Coffee Shop,Harbor / Marina
5,"Cabbagetown, St. James Town",Coffee Shop,Park,Italian Restaurant,Café,Restaurant,Market,Pub,Bakery,Pizza Place,General Entertainment
6,Central Bay Street\n,Coffee Shop,Café,Sandwich Place,Ice Cream Shop,Italian Restaurant,Burger Joint,Spa,Middle Eastern Restaurant,Bakery,Bar
7,"Chinatown, Grange Park, Kensington Market",Café,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Chinese Restaurant,Mexican Restaurant,Dumpling Restaurant,Bar,Bakery,Coffee Shop,Arts & Crafts Store
8,Christie\n,Café,Grocery Store,Park,Convenience Store,Coffee Shop,Restaurant,Diner,Baby Store,Nightclub,Athletics & Sports
9,Church and Wellesley,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Men's Store,Gym,Hotel,Gastropub,Fast Food Restaurant


In [58]:
tor_data

Unnamed: 0,PostalCode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
37,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West\n, Riverdale",M4K,43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West\n, India Bazaar",M4L,43.668999,-79.315572
43,M4M,East Toronto,Studio District\n,M4M,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North\n,M4P,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West\n,M4R,43.715383,-79.405678
47,M4S,Central Toronto,Davisville\n,M4S,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East\n",M4T,43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE\n, Rathnelly, South ...",M4V,43.686412,-79.400049


In [59]:

# add clustering labels
neighborhoods_venues.insert(0, 'Cluster Labels', kmeans.labels_)

# torontoMerged = grpToronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
neighborhoods_venues.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide\n, King\n, Richmond\n",Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Hotel,Asian Restaurant,Breakfast Spot,Restaurant,Cosmetics Shop
1,0,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Steakhouse,Bakery,Beer Bar,Seafood Restaurant,Café,Farmers Market,Clothing Store
2,0,"Brockton\n, Exhibition Place, Parkdale Village",Breakfast Spot,Coffee Shop,Café,Falafel Restaurant,Restaurant,Caribbean Restaurant,Bar,Climbing Gym,Furniture / Home Store,Italian Restaurant
3,0,Business Reply Mail Processing Centre 969 East...,Light Rail Station,Yoga Studio,Spa,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Recording Studio
4,0,"CN Tower, Bathurst Quay\n, Island airport\n, H...",Airport Service,Airport Lounge,Airport Terminal,Boutique,Plane,Sculpture Garden,Boat or Ferry,Bar,Coffee Shop,Harbor / Marina


In [60]:
tor_data.rename(index=str,columns={"Neighbourhood":"Neighborhood"})


Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
37,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West\n, Riverdale",M4K,43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West\n, India Bazaar",M4L,43.668999,-79.315572
43,M4M,East Toronto,Studio District\n,M4M,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North\n,M4P,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West\n,M4R,43.715383,-79.405678
47,M4S,Central Toronto,Davisville\n,M4S,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East\n",M4T,43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE\n, Rathnelly, South ...",M4V,43.686412,-79.400049


In [61]:
torontoMerged = mergedData.join(neighborhoods_venues)

In [62]:
torontoMerged = torontoMerged.dropna()

In [63]:
torontoMerged['Cluster Labels'] = torontoMerged['Cluster Labels'].astype(int)


In [64]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(torontoMerged['Latitude'], torontoMerged['Longitude'], torontoMerged['Neighborhood'], torontoMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters