In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pprint import pprint
import requests # library to handle requests
from bs4 import BeautifulSoup

# !conda install -c conda-forge folium --yes
import folium


from functools import lru_cache

# Start of question 1

### Use requests & Beautiful soup to scrape the table

In [2]:
url ="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(url)
soup = BeautifulSoup(r.content)


In [3]:
trs = soup.find_all('tr')
table = []
for ix, tr in enumerate(trs):
    tds = tr.find_all('td')
    table.append([tds[x].text.strip() for x, _ in enumerate(tds)])
    
data = table[1:288]
header= ['postcode', 'borough', 'neighbourhood']
postcode_df = pd.DataFrame(data=data, columns=header)


### The prefiltered data frame with three columns. 

In [4]:
postcode_df

Unnamed: 0,postcode,borough,neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


Filter out rows where the borough is "Not assigned", 
then if the neighbourhood is "Not assigned", replace it with the borough.

In [5]:
postcode_df = postcode_df[postcode_df['borough']!='Not assigned']


postcode_df.loc[postcode_df['neighbourhood']=='Not assigned',
                'neighbourhood'] = postcode_df.loc[postcode_df['neighbourhood']=='Not assigned',
                'borough']

# print the dataframe to verify that the change was made
postcode_df.loc[postcode_df['neighbourhood']=='Not assigned']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,postcode,borough,neighbourhood


Then split the rows into groups by post code. 

In [6]:
groups = postcode_df[['postcode', 'neighbourhood']].groupby('postcode')

In [7]:
rejoined = pd.DataFrame([groups['postcode'].transform(lambda x: x), 
              groups['neighbourhood'].transform(lambda x: ','.join(x),)]).T.drop_duplicates()

It's not clear from the instructions whether we're supposed to keep duplicated boroughs. If we do keep them, our new neighbourhood list field is going to be repeated. 

In [8]:
rejoined.columns=['postcode',"neighbourhood_list"]
rejoined.head(20)

Unnamed: 0,postcode,neighbourhood_list
2,M3A,Parkwoods
3,M4A,Victoria Village
4,M5A,Harbourfront
5,M6A,"Lawrence Heights,Lawrence Manor"
7,M7A,Queen's Park
9,M9A,Queen's Park
10,M1B,"Rouge,Malvern"
13,M3B,Don Mills North
14,M4B,"Woodbine Gardens,Parkview Hill"
16,M5B,"Ryerson,Garden District"


In [9]:
postcode_df = pd.merge(postcode_df,rejoined, 'inner', 'postcode').drop('neighbourhood', axis=1)
postcode_df

Unnamed: 0,postcode,borough,neighbourhood_list
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M6A,North York,"Lawrence Heights,Lawrence Manor"
...,...,...,...
205,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw..."
206,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw..."
207,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw..."
208,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw..."


In [10]:
postcode_df.shape



(210, 3)

In [11]:
#looking ahead, it seems that many rows will be complete duplicates. 
# Let's drop them, and check the shape again.
postcode_df.drop_duplicates(inplace=True)
postcode_df.shape

(103, 3)

# Start of Question 2

Since the instructions describe geocoder and the google api as unreliable, I'll be using the csv file. 

In [12]:
long_lat = pd.read_csv("http://cocl.us/Geospatial_data")

In [13]:
long_lat.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge the data onto our original dataset.
We should still have 103 rows afterward

In [14]:
# rename columns to simplify processing
long_lat.columns= ['postcode', 'latitude','longitude']
postcode_df = pd.merge(postcode_df, long_lat, how="left", on='postcode')
postcode_df.head()

Unnamed: 0,postcode,borough,neighbourhood_list,latitude,longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


In [15]:
postcode_df.shape

(103, 5)

# Start of Question 3
Explore and cluster the neighborhoods in Toronto. You can decide to work with only boroughs that contain the word Toronto and then replicate the same analysis we did to the New York City data. It is up to you.

Just make sure:

to add enough Markdown cells to explain what you decided to do and to report any observations you make.
to generate maps to visualize your neighborhoods and how they cluster together.
Once you are happy with your analysis, submit a link to the new Notebook on your Github repository. (3 marks)

In [16]:
center = [postcode_df['latitude'].median(), postcode_df['longitude'].median()]
toronto_map = folium.Map(location=center,
                         zoom_start=12)


In [17]:
center = [postcode_df['latitude'].median(), postcode_df['longitude'].median()]
toronto_map = folium.Map(location=center,
                         zoom_start=12)
for ix, postcode, borough, neighborhoods, lat, long in postcode_df.itertuples():
    label = f'{neighborhoods}, {borough}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, long],
                        radius=3,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False ).add_to(toronto_map)
toronto_map

Setup foursquare Credentials:

In [18]:
import configparser
config = configparser.ConfigParser()
config.read_file(open('foursquare.env'))
CLIENT_ID = config.get('foursquare','client_id' )
CLIENT_SECRET = config.get('foursquare', 'client_secret')

VERSION = '20180605' # Foursquare API version
radius=500
limit=100
# lat = 43.721577   
# long =-79.51835
# url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, long, radius, limit)
# results = requests.get(url).json()
# results = json_normalize(results['response']['groups'][0]['items'])

In [19]:
@lru_cache(maxsize=None)
def get_nearby_venues(lat, lng, radius=500, limit=1000, client_id=CLIENT_ID, client_secret=CLIENT_SECRET, version=VERSION):
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(client_id, client_secret, version, lat, long, radius, limit)
    results = requests.get(url).json()
    try: 
        results = json_normalize(results['response']['groups'][0]['items'])
        # columns that we want, but may not exist
        expected = pd.DataFrame(None, columns=["venue.name",
                                               "venue.location.postalCode",
                                               "venue.location.lat",
                                               "venue.location.lng", 
                                               "venue.categories"])
        #insert into new dataframe, more or less
        venues = pd.concat([expected, results])[expected.columns]

        venues.columns = ['name', 'ven_postalcode', 'ven_latitude', 'ven_longitude','ven_categories',]
        return venues
    except: 
        return None



In [20]:
postcode_df.head()

Unnamed: 0,postcode,borough,neighbourhood_list,latitude,longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


In [21]:
venues = pd.DataFrame(data=None, columns=['merge_key',
                                          'name', 
                                          'ven_postalcode', 
                                          'ven_latitude', 
                                          'ven_longitude',
                                          'ven_categories',])

for ix, lat, lng in list(postcode_df[['latitude', 'longitude']].itertuples()):
    more_venues = get_nearby_venues(lat=lat, lng=lng)
    print(more_venues)
    if more_venues is not None:
        more_venues['merge_key'] = int(ix)
        venues= pd.concat([venues, more_venues])
    

        
venues.shape

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


                     name ven_postalcode  ven_latitude  ven_longitude  \
0        Pho Com Viet Nam        M3N 2V3     43.756631     -79.518336   
1               Pizza Hut        M3N 2V3     43.756340     -79.517818   
2                     KFC        M3N 2V3     43.756600     -79.518100   
3          The Beer Store        M3N 2V3     43.756094     -79.516239   
4                  Subway        M3N 2V3     43.756171     -79.518251   
5             Tim Hortons        M3N 1N1     43.754344     -79.527024   
6             Tim Hortons        M3N 2K1     43.756128     -79.516266   
7         Pho Mi Viet Hoa        M3N 2J5     43.751936     -79.515979   
8          Planet Fitness            NaN     43.757538     -79.519610   
9     Hwy 400 at Finch W.            NaN     43.754399     -79.526967   
10  Jian Hing Supermarket        M3N 2K2     43.756673     -79.518444   

                                       ven_categories  
0   [{'id': '4bf58dd8d48988d14a941735', 'name': 'V...  
1   [{'id':

                     name ven_postalcode  ven_latitude  ven_longitude  \
0               Bulk Barn            NaN     43.660939     -79.517773   
1  Humbertown Village Spa        M9A 3T7     43.661428     -79.519289   
2     Black's Photography        M9A 3T7     43.661490     -79.519317   

                                      ven_categories  
0  [{'id': '52f2ab2ebcbc57f1066b8b46', 'name': 'S...  
1  [{'id': '4bf58dd8d48988d1ed941735', 'name': 'S...  
2  [{'id': '4eb1bdf03b7b55596b4a7491', 'name': 'C...  
                                             name ven_postalcode  \
0                          Wallace C. Swanek Park            NaN   
1                                    Grattan Park            NaN   
2                                   Chicken Place            NaN   
3  Maple Leafs Movers North York : Moving Company        M9N 2P3   
4                    Level 100 Sports Productions        M9N 2J7   

   ven_latitude  ven_longitude  \
0     43.708896     -79.522648   
1     43.7

               name ven_postalcode  ven_latitude  ven_longitude  \
0      Petro-Canada        M3N 2L2     43.766586     -79.519870   
1            Rexall        M3N 2L3     43.766590     -79.519852   
2  Basketball Court            NaN     43.766837     -79.522100   
3              LCBO        M3N 3A1     43.759257     -79.519454   

                                      ven_categories  
0  [{'id': '4bf58dd8d48988d113951735', 'name': 'G...  
1  [{'id': '4bf58dd8d48988d10f951735', 'name': 'P...  
2  [{'id': '4bf58dd8d48988d1e1941735', 'name': 'B...  
3  [{'id': '4bf58dd8d48988d186941735', 'name': 'L...  
                    name ven_postalcode  ven_latitude  ven_longitude  \
0  TTC Bus 73 Royal York            NaN     43.677246     -79.519630   
1    Pasta International            NaN     43.672703     -79.519223   
2        Allanhurst Park            NaN     43.678968     -79.525004   
3          A Dog's Haven        M9A 4P5     43.672946     -79.517926   

                            

                   name ven_postalcode  ven_latitude  ven_longitude  \
0          St. Joseph's            NaN     43.804703     -79.523681   
1  North Toronto Paints        L4K 4L7     43.804160     -79.525980   

                                      ven_categories  
0  [{'id': '52f2ab2ebcbc57f1066b8b28', 'name': 'P...  
1  [{'id': '5454144b498ec1f095bff2f2', 'name': 'C...  
                         name ven_postalcode  ven_latitude  ven_longitude  \
0            Pho Com Viet Nam        M3N 2V3     43.756631     -79.518336   
1          Shoppers Drug Mart        M3N 2K1     43.756147     -79.515843   
2                   Pizza Hut        M3N 2V3     43.756340     -79.517818   
3                         KFC        M3N 2V3     43.756600     -79.518100   
4              The Beer Store        M3N 2V3     43.756094     -79.516239   
5                   No Frills        M3N 3A1     43.758178     -79.519680   
6   Popeyes Louisiana Kitchen        M3N 2K1     43.756604     -79.516047   
7    

                                  name ven_postalcode  ven_latitude  \
0                            Starbucks        M8X 2X3     43.645425   
1   Gourmet Bakery (Islington Station)            NaN     43.644919   
2                               Sobeys        M8X 2X9     43.645726   
3                               Gojima        M8X 1E9     43.645094   
4                           Lemongrass            NaN     43.645010   
5                                  A&W        M8X 1G2     43.645096   
6                                  A&W        M8X 2X4     43.645360   
7                         Thai Express        M8X 2X3     43.645427   
8                           Second Cup        M8X 2W8     43.645331   
9                               Subway            NaN     43.645014   
10                    GoodLife Fitness        M8X 2X2     43.645540   
11                         Tim Hortons        M8Z 5B5     43.645278   
12                       Mitzie's Jerk        M8X 1E9     43.645215   
13    

                                name ven_postalcode  ven_latitude  \
0        Power Yoga Canada Etobicoke            NaN     43.636592   
1                        Tim Hortons        M8Z 4R9     43.635973   
2                    TD Canada Trust        M8Z 4R6     43.633786   
3                Rocco's Plum Tomato            NaN     43.634898   
4                         Second Cup        M8Z 4R6     43.634262   
5                Rainbow Convenience            NaN     43.635901   
6                      Fresh & Tasty            M8Z     43.635899   
7                     Tom Riley Park            NaN     43.638972   
8  Armstrong & Nelson Eavestroughing        M8Z 2W1     43.637847   

   ven_longitude                                     ven_categories  
0     -79.520312  [{'id': '4bf58dd8d48988d102941735', 'name': 'Y...  
1     -79.520597  [{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...  
2     -79.519649  [{'id': '4bf58dd8d48988d10a951735', 'name': 'B...  
3     -79.519951  [{'id': '4b

                         name ven_postalcode  ven_latitude  ven_longitude  \
0            Pho Com Viet Nam        M3N 2V3     43.756631     -79.518336   
1                   No Frills        M3N 3A1     43.758178     -79.519680   
2                   Pizza Hut        M3N 2V3     43.756340     -79.517818   
3          Shoppers Drug Mart        M3N 2K1     43.756147     -79.515843   
4                         KFC        M3N 2V3     43.756600     -79.518100   
5                Petro-Canada        M3N 2Z2     43.757187     -79.517778   
6   Popeyes Louisiana Kitchen        M3N 2K1     43.756604     -79.516047   
7              The Beer Store        M3N 2V3     43.756094     -79.516239   
8                      Subway        M3N 2V3     43.756171     -79.518251   
9                 Pizza Pizza        M3N 2H1     43.756186     -79.515460   
10             Planet Fitness            NaN     43.757538     -79.519610   
11                Tim Hortons        M3N 2K1     43.756128     -79.516266   

                          name ven_postalcode  ven_latitude  ven_longitude  \
0  Black Creek Pioneer Village        M3J 3P1     43.773393     -79.516991   
1                 Petro-Canada        M3N 2L2     43.766586     -79.519870   
2          Tri Star Restaurant        M3N 2L2     43.769336     -79.520250   
3    John Booth Memorial Arena            NaN     43.769549     -79.523248   

                                      ven_categories  
0  [{'id': '4deefb944765f83613cdba6e', 'name': 'H...  
1  [{'id': '4bf58dd8d48988d113951735', 'name': 'G...  
2  [{'id': '4bf58dd8d48988d145941735', 'name': 'C...  
3  [{'id': '4bf58dd8d48988d168941735', 'name': 'S...  
             name ven_postalcode  ven_latitude  ven_longitude  \
0       No Frills        M3N 3A1     43.758178     -79.519680   
1     Tim Hortons        M3N 2K2     43.757604     -79.518882   
2            LCBO        M3N 3A1     43.759257     -79.519454   
3  Planet Fitness            NaN     43.757538     -79.519610   

        

                          name ven_postalcode  ven_latitude  ven_longitude  \
0                   COBS Bread        M9A 3T7     43.664940     -79.520485   
1                    Bulk Barn            NaN     43.660939     -79.517773   
2           Shoppers Drug Mart        M9A 3T7     43.661721     -79.518881   
3                         LCBO        M9A 3T6     43.661298     -79.518150   
4                      Loblaws        M9A 3T7     43.661721     -79.518881   
5   Humbertown Shopping Centre        M9A 3T7     43.661797     -79.516861   
6          Black's Photography        M9A 3T7     43.661490     -79.519317   
7       Humbertown Village Spa        M9A 3T7     43.661428     -79.519289   
8               Baskin-Robbins            M9A     43.661654     -79.519190   
9            Alex Farm Produce            NaN     43.661528     -79.518899   
10                     Daisaky            NaN     43.662380     -79.519673   
11                  Second Cup            NaN     43.662273     

                    name ven_postalcode  ven_latitude  ven_longitude  \
0          LaRose Bakery            NaN     43.686835     -79.524436   
1            Pizza Pizza        M9P 1B2     43.685256     -79.523582   
2  Moxie's Classic Grill            NaN     43.686870     -79.524454   
3             Mac's Milk            NaN     43.685572     -79.523698   

                                      ven_categories  
0  [{'id': '4bf58dd8d48988d16a941735', 'name': 'B...  
1  [{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...  
2  [{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...  
3  [{'id': '4d954b0ea243a5684a65b473', 'name': 'C...  
                                  name ven_postalcode  ven_latitude  \
0                  Riviera Event Space        L4K 1W8     43.795887   
1                    Interchange Plaza            NaN     43.793336   
2                          Coffee Time        L4K 3R9     43.795984   
3                     Avenue Nightclub            NaN     43.796372   
4  Double

                                      name ven_postalcode  ven_latitude  \
0  Mastercard Centre For Hockey Excellence        M8V 3L1     43.603126   
1                          Egglicious Cafe        M8Z 5C9     43.606485   
2                              Tim Hortons        M8Z 5C9     43.606077   
3                       Pro Teach Baseball        M8V 3N7     43.602518   

   ven_longitude                                     ven_categories  
0     -79.519818  [{'id': '4bf58dd8d48988d168941735', 'name': 'S...  
1     -79.520693  [{'id': '4bf58dd8d48988d143941735', 'name': 'B...  
2     -79.520600  [{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...  
3     -79.523000  [{'id': '4bf58dd8d48988d1e8941735', 'name': 'B...  
            name ven_postalcode  ven_latitude  ven_longitude  \
0    Wok of Fame            NaN     43.740249     -79.518762   
1  Country Style        M3N 1V9     43.737045     -79.523811   

                                      ven_categories  
0  [{'id': '4bf58dd8d48988

                                  name ven_postalcode  ven_latitude  \
0                            Starbucks        M8X 2X3     43.645425   
1   Gourmet Bakery (Islington Station)            NaN     43.644919   
2                               Sobeys        M8X 2X9     43.645726   
3                               Gojima        M8X 1E9     43.645094   
4                           Lemongrass            NaN     43.645010   
5                                  A&W        M8X 1G2     43.645096   
6                                  A&W        M8X 2X4     43.645360   
7                         Thai Express        M8X 2X3     43.645427   
8                           Second Cup        M8X 2W8     43.645331   
9                               Subway            NaN     43.645014   
10                    GoodLife Fitness        M8X 2X2     43.645540   
11                         Tim Hortons        M8Z 5B5     43.645278   
12                       Mitzie's Jerk        M8X 1E9     43.645215   
13    

(696, 6)

In [22]:
venues


Unnamed: 0,merge_key,name,ven_categories,ven_latitude,ven_longitude,ven_postalcode
0,0,Pho Com Viet Nam,"[{'id': '4bf58dd8d48988d14a941735', 'name': 'V...",43.756631,-79.518336,M3N 2V3
1,0,Pizza Hut,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",43.756340,-79.517818,M3N 2V3
2,0,KFC,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",43.756600,-79.518100,M3N 2V3
3,0,The Beer Store,"[{'id': '5370f356bcbc57f1066c94c2', 'name': 'B...",43.756094,-79.516239,M3N 2V3
4,0,Subway,"[{'id': '4bf58dd8d48988d1c5941735', 'name': 'S...",43.756171,-79.518251,M3N 2V3
...,...,...,...,...,...,...
10,102,Royal Canadian Legion #210,"[{'id': '52e81612bcbc57f1066b7a33', 'name': 'S...",43.628855,-79.518903,M8Z 2H1
11,102,Islington Florist & Nursery,"[{'id': '4bf58dd8d48988d11b951735', 'name': 'F...",43.630156,-79.518718,
12,102,Value Village,"[{'id': '4bf58dd8d48988d101951735', 'name': 'T...",43.631269,-79.518238,M8Z 6A4
13,102,Kingsway Boxing Club,"[{'id': '4bf58dd8d48988d176941735', 'name': 'G...",43.627254,-79.526684,M8Z 2G6


In [23]:
 df = pd.merge(postcode_df, venues, left_index=True, right_on='merge_key')
#parse out categories:
df['ven_categories']=df.ven_categories.apply(lambda x: x[0]['shortName'])
df.to_csv('merged_data.csv')

In [24]:
merged_records = pd.read_csv('merged_data.csv',index_col=0)
merged_records.reset_index(inplace=True)



In [25]:
merged = pd.merge(merged_records, pd.get_dummies(merged_records["ven_categories"]),left_index=True, right_index=True)

In [26]:
cols = ['latitude',
 'longitude',
 'Apparel',
 'Athletics & Sports',
 'Auto Garage',
 'Bakery',
 'Bank',
 'Baseball Field',
 'Basketball Court',
 'Beer Store',
 'Breakfast',
 'Brewery',
 'Burgers',
 'Burritos',
 'Bus',
 'Bus Station',
 'Bus Stop',
 'Business Services',
 'Butcher',
 'Café',
 'Camera Store',
 'Caribbean',
 'Chinese',
 'Coffee Shop',
 'Construction',
 'Convenience Store',
 'Deli / Bodega',
 'Diner',
 'Discount Store',
 'Dive Bar',
 'Dog Run',
 'Dry Cleaner',
 'Fast Food',
 'Flower Shop',
 'Food & Drink',
 'Fried Chicken',
 'Furniture / Home',
 'Gas Station',
 'Gastropub',
 'Gluten-free',
 'Golf Course',
 'Grocery Store',
 'Gym',
 'Gym / Fitness',
 'Hardware',
 'Historic Site',
 'History Museum',
 'Home Services',
 'Ice Cream',
 'Indian',
 'Intersection',
 'Italian',
 'Japanese',
 'Latin American',
 'Liquor Store',
 'Locksmith',
 'Mall',
 'Metro',
 'Moving Target',
 'Music School',
 'Nightclub',
 'Nightlife',
 'Outdoors & Recreation',
 'Park',
 'Pharmacy',
 'Pizza',
 'Playground',
 'Plaza',
 'Pool',
 'Print Shop',
 'Pub',
 'Rental Car',
 'Restaurant',
 'Road',
 'Salon / Barbershop',
 'Sandwiches',
 'Skating Rink',
 'Snacks',
 'Soccer Field',
 'Social Club',
 'Spa',
 'Sports Bar',
 'Sports Club',
 'Supermarket',
 'Supplement Shop',
 'Sushi',
 'Tea Room',
 'Thai',
 'Theme Park',
 'Thrift / Vintage',
 'Video Games',
 'Vietnamese',
 'Wings',
 'Yoga Studio',
 'Zoo']


In [27]:
grouped = merged[cols].groupby(['latitude', 'longitude'])
means = grouped.mean()
means.columns = [c + '_vertical_mean' for c in means.columns]
sums = grouped.sum()
sums['size'] = sums.sum(axis=1)
for column in sums.columns[:-1]:
    sums[column] = sums[column] / sums['size']
sums.columns = [c + '_horizontal_mean' if c != 'size' else c for c in sums.columns] 

scores = pd.merge(sums, means, left_index=True, right_index=True)

In [28]:
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X=scores)

In [29]:
from sklearn.cluster import KMeans
kmm = KMeans(n_clusters = 5)
labels = kmm.fit(X).predict(X)

In [30]:
ix = scores[[]]

In [31]:
ix['cluster']=labels
ix

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Unnamed: 1_level_0,cluster
latitude,longitude,Unnamed: 2_level_1
43.602414,-79.543484,1
43.605647,-79.501321,1
43.628841,-79.520999,4
43.628947,-79.394420,4
43.636258,-79.498509,1
...,...,...
43.799525,-79.318389,1
43.803762,-79.363452,2
43.806686,-79.194353,2
43.815252,-79.284577,1


In [32]:
labeled = pd.merge(postcode_df, ix, left_on=['latitude','longitude'], right_index=True)

In [33]:
labeled

Unnamed: 0,postcode,borough,neighbourhood_list,latitude,longitude,cluster
0,M3A,North York,Parkwoods,43.753259,-79.329656,1
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636,1
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,1
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242,1
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,1
...,...,...,...,...,...,...
91,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1
93,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,1
96,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675,1
97,M5X,Downtown Toronto,"First Canadian Place,Underground city",43.648429,-79.382280,0


In [37]:
colors = {0:'red',1:'orange', 2:'yellow',3:'green', 4:'blue' }
for ix, postcode, borough, neighborhoods, lat, long, cluster in labeled.itertuples():
    label = f'{neighborhoods}, {borough}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, long],
                        radius=3,
                        popup=label,
                        color=colors[cluster],
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False ).add_to(toronto_map)
toronto_map