# ASSIGNMENT:
To explore and cluster the neighborhoods in Toronto.

# PART 1 - Web Scraping & DataFrame Creation

In [1]:
import warnings
import requests
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 250)
import numpy as np
from bs4 import BeautifulSoup  # Library for Web Scraping

### Code to scrape the given <a href="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M">Wikipedia page</a>, using the BeautifulSoup package in python.

In [2]:
# Retrieve the HTML of the website
website_HTML = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(website_HTML, "lxml")

### Retrieving the Table data from the HTML of the given Wikipedia page.

1. #### Retrieve the Table headers &lt;th&gt; for the pandas Dataframe column names

In [3]:
# Retrieve the table element from HTML
post_code_table = soup.find("table", {"class":"wikitable sortable"})
# Retrieve the table headers for the pandas Dataframe column headers
table_headers = post_code_table.findAll("th")
# Retrieve only text from the data and exclude the HTML tags
headers = []
for header in table_headers:
    headers.append(header.get_text().strip())
print(headers)

['Postcode', 'Borough', 'Neighbourhood']


2. #### Retrieve the Table rows &lt;tr&gt; and the Table cell data &lt;td&gt; for the pandas Dataframe contents

In [4]:
table_contents = []
# For each table row <tr> in all the table rows <tr>
for table_row in post_code_table.findAll("tr"):
    # Retrieve the table cells data along with tags
    table_cells = table_row.findAll("td")
    table_content = []
    # For each table cell <td> in all the table cells <td>
    for table_cell in table_cells:
        # Write each of the 3 table cells (store each table cell in a row) in list and strip tags, escape characters etc.
        table_content.append(table_cell.text.strip())
    # Write each list of 3 cells/items (store table rows) into another list named table_contents
    table_contents.append(table_content)
#print(table_contents)

3. Convert the above obtained list into a pandas DataFrame

   <b>Guidelines to design the dataframe:</b>
   1. The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
   2. Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
   3. If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
   4. More than one neighborhood can exist in one postal code area.

<b>3.1</b> The dataframe will consist of three columns: <b>PostalCode, Borough,</b> and <b>Neighborhood</b>

In [5]:
# Converting the table_contents list into a pandas DataFrame
df = pd.DataFrame(data=table_contents)
# Remove the first row as it is the table_row <tr> with the table_headers <th>
df.drop([0], axis=0, inplace=True)
# Reset the index back to default after row removal
df.reset_index(drop=True, inplace=True)
# Give names to the Dataframe columns using the headers list that we obtained earlier
df.columns = headers
# Rename the columns as per requirement
df.rename(columns={"Postcode":"PostalCode", "Neighbourhood":"Neighborhood"}, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


<br>
<b>3.2</b> Only process the cells that have an assigned borough. <b>Ignore cells with a borough that is Not assigned.</b>

In [6]:
# Select rows from the Dataframe df that have a value assigned to the Borough feature/column
df = df[df["Borough"] != "Not assigned"]
# Reset index after row removals
df.reset_index(drop=True, inplace=True)
df.head(7)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned


<br>
<b>3.3</b> <b>If a cell has a borough but a Not assigned neighborhood</b>, then <b>the neighborhood will be the same as the borough.</b>

In [7]:
# Create a pandas Series with Boolean type values of the Neighborhood column 
# and give a condition such that the rows with "Not assigned" are True in the Series 
neighborhood_na = df["Neighborhood"] == "Not assigned"
# Assign the values of the Borough column to the Neighborhood column where the
# Neighborhood is Not assigned
df.loc[neighborhood_na, "Neighborhood"] = df.loc[neighborhood_na, "Borough"]
df.head(7)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park


<br>
<b>3.4</b> <b>More than one neighborhood can exist in one postal code area.</b>

In [8]:
# We group the dataframe first by the PostalCode and then by the Borough
df = df.groupby(["PostalCode","Borough"], as_index=False).agg(lambda x: ",".join(x))

### Dataframe with 3 columns:
- PostalCode
- Borough
- Neighborhood

In [9]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Shape (rows, columns) of the DataFrame df

In [10]:
df.shape

(103, 3)

<br>

# PART 2 - Update the DataFrame with Latitude & Longitude Coordinates
In order to utilize the Foursquare location data, we need to get the latitude and the longitude coordinates of each neighborhood.

#### Geocoder package:
- We will use the Geocoder Python package for getting the latitude and the longitude coordinates of each neighborhood. But the problem with this Package is you have to be persistent sometimes in order to get the geographical coordinates of a given postal code.
- Given that this package can be very unreliable, in case you are not able to get the geographical coordinates of the neighborhoods using the Geocoder package, here is a link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [11]:
# We will use the csv file using the given URL
dataset_url = "http://cocl.us/Geospatial_data"
df_coordinates = pd.read_csv(dataset_url)
df_coordinates.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


- #### We need to concatenate or merge and join this dataset with the earlier one properly so that the correct values of coordinates are assigned to the specific neighborhood.

In [12]:
# Make a copy of your original dataframe df while setting your index as PostalCode from df
df_copy = df.set_index("PostalCode")
# Make a copy of your original dataframe df_coordinates while setting your index as Postal Code from df_coordinates
df_coordinates_copy = df_coordinates.set_index("Postal Code")
# Concatenate the copies of the dataframe by joining the dataframes by the PostalCode/index values
# We use inner join so that we get a intersection (one common index column) of keys/indices from both frames
df_toronto = pd.concat([df_copy,df_coordinates_copy], axis=1, join="inner")
df_toronto.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [13]:
# Before we can reset index and retain it as a column, give it a name, in this case we name it PostalCode
df_toronto.index.name = "PostalCode"
# Reset index and specify drop=False as we need the index column as a data column in the dataframe
df_toronto.reset_index(drop=False, inplace=True)

### Dataframe with 5 columns:
- PostalCode
- Borough
- Neighborhood
- Latitude
- Longitude

In [14]:
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<br>

# PART 3 - Explore and cluster the neighborhoods in Toronto
Let us utilize the Foursquare API to explore the neighborhoods and segment them.

In [15]:
# Import libraries
from geopy.geocoders import Nominatim
#!conda install -c anaconda folium -y
!conda install -c conda-forge folium=0.10.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.10.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.10.0              |             py_0          59 KB  conda-forge

The following packages will be UPDATED:

    folium: 0.5.0-py_0 conda-forge --> 0.10.0-py_0 conda-forge


Downloading and Extracting Packages
folium-0.10.0        | 59 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


## Visualizing Toronto and its Neighborhoods using Folium Maps

#### Get the latitude and longitude values of Toronto, CA

In [16]:
address_tca = "Toronto, CA"
# In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent ca_explorer
geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address_tca)
lati_toronto = location.latitude
long_toronto = location.longitude
print("The geographical coordinates of Toronto are {}, {}".format(lati_toronto, long_toronto))

The geographical coordinates of Toronto are 43.653963, -79.387207


####  Map of Toronto with neighborhoods superimposed on top

In [17]:
# Create a map using the location coordinates of Toronto found using geopy library in the above code cell
map_toronto = folium.Map(location = [lati_toronto, long_toronto], zoom_start=10)

# This code (for loop) is for placing labels over your map for boroughs and neighborhoods in Toronto
for lat, lng, borough, neighborhood in zip(df_toronto["Latitude"], df_toronto["Longitude"], df_toronto["Borough"], df_toronto["Neighborhood"]):
    label = "{}, {}".format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color="blue",
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

map_toronto

> Click on each circle mark to reveal the name of the neighborhood and its respective borough.

In [18]:
# To check all the unique Boroughs 
df_toronto["Borough"].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

#### Let's explore boroughs that contain the word Toronto and its neighborhoods

In [19]:
# We will work with only boroughs that contain the word Toronto
# Make a copy of your df_toronto dataframe and set the index as Borough
df_toronto_copy = df_toronto.set_index("Borough")
# Create a dataframe with rows that conatain the string "Toronto"
df_toronto_boroughs = df_toronto_copy.filter(like="Toronto", axis=0)
# Reset the index of your newly created dataframe
df_toronto_boroughs.reset_index(drop=False, inplace=True)
print(df_toronto_boroughs.shape)
df_toronto_boroughs.head()

(38, 5)


Unnamed: 0,Borough,PostalCode,Neighborhood,Latitude,Longitude
0,East Toronto,M4E,The Beaches,43.676357,-79.293031
1,East Toronto,M4K,"The Danforth West,Riverdale",43.679557,-79.352188
2,East Toronto,M4L,"The Beaches West,India Bazaar",43.668999,-79.315572
3,East Toronto,M4M,Studio District,43.659526,-79.340923
4,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879


### Visualization of East, Central, Downtown and West Toronto Boroughs and their Neighborhoods in the Toronto map

In [20]:
# Lets visualize the East, Central, Downtown and West Toronto Boroughs and their Neighborhoods in the Toronto map
map_toronto_boroughs = folium.Map(location=[lati_toronto, long_toronto], zoom_start=12)

for lat_t, lng_t, postcode_t, borough_t, neighborhood_t in zip(df_toronto_boroughs["Latitude"], df_toronto_boroughs["Longitude"], df_toronto_boroughs["PostalCode"], df_toronto_boroughs["Borough"], df_toronto_boroughs["Neighborhood"]):
    label = "{},{},({})".format(neighborhood_t, borough_t, postcode_t)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat_t, lng_t],
        radius=5,
        popup=label,
        color="red",
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(map_toronto_boroughs)

map_toronto_boroughs

> Click on each marker to reveal the name of the neighborhood, its Borough and the Postal Code respectively.

## Using the Foursquare API
#### Use your Foursquare API Credentials to explore venues in each Borough in the df_toronto_boroughs dataframe

In [21]:
CLIENT_ID = 'xxxxxxxxxxxx' # your Foursquare ID
CLIENT_SECRET = 'xxxxxxxxxxxx' # your Foursquare Secret
VERSION = 'xxxxxxxx' # Foursquare API version

#### Exploring venues in each Borough in the df_toronto_boroughs within a radius of 500 meters

In [22]:
# LIMIT of number of venues returned by the Foursquare API
LIMIT = 50
radius = 500

# Make a list to hold all the venue information like venue name, latitude, longitude and category
venues = []

# To iterate over each borough and its neighborhood use a for loop
for lat_f, lng_f, postcode_f, borough_f, neighborhood_f in zip(df_toronto_boroughs["Latitude"], df_toronto_boroughs["Longitude"], df_toronto_boroughs["PostalCode"], df_toronto_boroughs["Borough"], df_toronto_boroughs["Neighborhood"]):
    # Create the GET request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat_f,
        lng_f,
        radius,
        LIMIT)
    
    # Request result from the GET url
    # requests.get(url).json() => will return all the information most of which is not required, so read specific data
    # json files are all about nested dictionaries and list, so we basically go into depth to reach the needed items
    # In Foursquare API, all the information is in the items key
    toronto_boroughs_info = requests.get(url).json()['response']['groups'][0]['items']
    
    for venue in toronto_boroughs_info:
        venues.append((
            # First append all the column names (PostalCode,Borough,Neighborhood,Latitude,Longitude)
            postcode_f,
            borough_f,
            neighborhood_f,
            lat_f,
            lng_f,
            # Then, append the required data from the json file
            venue['venue']['name'],
            venue['venue']['categories'][0]['name'],
            venue['venue']['location']['lat'],
            venue['venue']['location']['lng']))

In [23]:
# Convert the venues list into a pandas Dataframe
toronto_boroughs_venues = pd.DataFrame(data=venues)
# Give column names to your new dataframe
toronto_boroughs_venues.columns = ["PostalCode","Borough","Neighborhood","Latitude","Longitude","Venue","Venue Category","Venue Latitude","Venue Longitude"]
print(toronto_boroughs_venues.shape)
toronto_boroughs_venues.head()

(1163, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Venue,Venue Category,Venue Latitude,Venue Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Manor Ravine,Trail,43.676821,-79.293942
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Grover Pub and Grub,Pub,43.679181,-79.297215
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Upper Beaches,Neighborhood,43.680563,-79.292869
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,Dip 'n Sip,Coffee Shop,43.678897,-79.297745


## Let's explore the venues for each borough in the df_toronto_boroughs

#### Borough 1 - East Toronto

In [24]:
# Let's see the venue names and their coressponding venue category/types in East Toronto
warnings.filterwarnings("ignore")
df_east_toronto = toronto_boroughs_venues[toronto_boroughs_venues["Borough"]=="East Toronto"]
df_east_toronto.drop(["PostalCode","Borough","Neighborhood","Latitude","Longitude","Venue Latitude","Venue Longitude"], axis=1, inplace=True)
df_east_toronto.rename(columns={"Venue":"Venue Name"}, inplace=True)
df_east_toronto = df_east_toronto.groupby(["Venue Category"], as_index=False).agg(lambda x: ",  ".join(x))
print(df_east_toronto.shape)
df_east_toronto.head(10)

(67, 2)


Unnamed: 0,Venue Category,Venue Name
0,American Restaurant,"Factory Girl, braised, Brooklyn Tavern"
1,Auto Workshop,Amin Car Repair Garage
2,Bakery,"Dough Bakeshop, Brick Street Breads, Bonjour Brioche"
3,Bank,TD Canada Trust
4,Bar,The Roy Public House
5,Bookstore,"Re: Reading, Queen Books"
6,Brewery,"Louis Cifer Brew Works, Godspeed Brewery, Saulter Street Brewery, Rorschach Brewing Co."
7,Bubble Tea Shop,Tsaa Tea Shop
8,Burger Joint,"Burger Stomper Gourmet Burger & Milkshake Bar, The Burger's Priest"
9,Burrito Place,"Burrito Bandidos, Chino Locos"


In [25]:
print("Number of Unique Venue Categories from East Toronto are {}".format(len(df_east_toronto["Venue Category"].unique())))

Number of Unique Venue Categories from East Toronto are 67


#### Borough 2 - Central Toronto

In [26]:
# Let's see the venue names and their coressponding venue category/types in Central Toronto
warnings.filterwarnings("ignore")
df_central_toronto = toronto_boroughs_venues[toronto_boroughs_venues["Borough"]=="Central Toronto"]
df_central_toronto.drop(["PostalCode","Borough","Neighborhood","Latitude","Longitude","Venue Latitude","Venue Longitude"], axis=1, inplace=True)
df_central_toronto.rename(columns={"Venue":"Venue Name"}, inplace=True)
df_central_toronto = df_central_toronto.groupby(["Venue Category"], as_index=False).agg(lambda x: ", ".join(x))
print(df_central_toronto.shape)
df_central_toronto.head(10)

(60, 2)


Unnamed: 0,Venue Category,Venue Name
0,American Restaurant,"Union Social Eatery, Rose & Sons"
1,BBQ Joint,Big Crow
2,Bagel Shop,Kiva's
3,Bookstore,Mabel's Fables
4,Breakfast Spot,Homeway Restaurant & Brunch
5,Brewery,Granite Brewery
6,Burger Joint,"A&W Canada, Madame Boeuf And Flea"
7,Bus Line,TTC Bus #162 - Lawrence-Donway
8,Café,"Thobors Boulangerie Patisserie Café, Meow Cat Cafe, Ezra's Pound, Dish Cooking Studio, Haute Coffee"
9,Chinese Restaurant,C'est Bon


In [27]:
print("Number of Unique Venue Categories from Central Toronto are {}".format(len(df_central_toronto["Venue Category"].unique())))

Number of Unique Venue Categories from Central Toronto are 60


#### Borough 3 - Downtown Toronto

In [28]:
# Let's see the venue names and their coressponding venue category/types in Downtown Toronto
warnings.filterwarnings("ignore")
df_downtown_toronto = toronto_boroughs_venues[toronto_boroughs_venues["Borough"]=="Downtown Toronto"]
df_downtown_toronto.drop(["PostalCode","Borough","Neighborhood","Latitude","Longitude","Venue Latitude","Venue Longitude"], axis=1, inplace=True)
df_downtown_toronto.rename(columns={"Venue":"Venue Name"}, inplace=True)
df_downtown_toronto = df_downtown_toronto.groupby(["Venue Category"], as_index=False).agg(lambda x: ", ".join(x))
print(df_downtown_toronto.shape)
df_downtown_toronto.head(10)

(180, 2)


Unnamed: 0,Venue Category,Venue Name
0,Airport,Billy Bishop Toronto City Airport (YTZ) (Billy Bishop Toronto City Airport)
1,Airport Food Court,Billy Bishop Café
2,Airport Gate,Gate 8
3,Airport Lounge,"Porter Lounge, Crew Room"
4,Airport Service,"HeliTours, ORNGE - Toronto Air Base"
5,Airport Terminal,"Air Canada Check-In Counter, Porter FBO Limited"
6,American Restaurant,"JOEY, Richmond Station, JOEY, Cactus Club Cafe, The Gabardine, Richmond Station, Cactus Club Cafe, Cactus Club Cafe, Richmond Station, Cactus Club Cafe"
7,Antique Shop,GW General
8,Aquarium,"Ripley's Aquarium of Canada, Ray Bay"
9,Art Gallery,"Arta Gallery, Ryerson Image Centre, St. Lawrence Market Plaza, St. Lawrence Market Plaza, The Power Plant, Design Exchange, Design Exchange, St. Lawrence Market Plaza, Design Exchange"


In [29]:
print("Number of Unique Venue Categories from Downtown Toronto are {}".format(len(df_downtown_toronto["Venue Category"].unique())))

Number of Unique Venue Categories from Downtown Toronto are 180


#### Borough 4 - West Toronto

In [30]:
# Let's see the venue names and their coressponding venue category/types in West Toronto
warnings.filterwarnings("ignore")
df_west_toronto = toronto_boroughs_venues[toronto_boroughs_venues["Borough"]=="West Toronto"]
df_west_toronto.drop(["PostalCode","Borough","Neighborhood","Latitude","Longitude","Venue Latitude","Venue Longitude"], axis=1, inplace=True)
df_west_toronto.rename(columns={"Venue":"Venue Name"}, inplace=True)
df_west_toronto = df_west_toronto.groupby(["Venue Category"], as_index=False).agg(lambda x: ", ".join(x))
print(df_west_toronto.shape)
df_west_toronto.head(10)

(83, 2)


Unnamed: 0,Venue Category,Venue Name
0,Antique Shop,SMASH
1,Art Gallery,Artscape Youngplace
2,Arts & Crafts Store,ARTiculations
3,Asian Restaurant,"Foxley Bistro, SoSo Food Club, Hanmoto"
4,Bakery,"Happy Bakery & Pastries, Nova Era Bakery, Venezia Bakery, Nova Era Bakery, Canada Bread Factory, Pascal's Baguette & Bagels"
5,Bank,"TD Canada Trust, TD Canada Trust"
6,Bar,"The Greater Good Bar, Reposado, The Communist's Daughter, Dakota Tavern, Get Well, apt 200, Pharmacy, Hole in the Wall, The Local Pub and Restaurant, A Dark Horse"
7,Bistro,Té
8,Bookstore,"Pandemonium, A Good Read, Book City (Bloor West)"
9,Boutique,Peace Collective


In [31]:
print("Number of Unique Venue Categories from West Toronto are {}".format(len(df_west_toronto["Venue Category"].unique())))

Number of Unique Venue Categories from West Toronto are 83


## Analyze venues in each Neighborhood

In [32]:
# Using the one hot encoding method, we'll analyze the venues
toronto_onehot_encoding = pd.get_dummies(toronto_boroughs_venues["Venue Category"], prefix="", prefix_sep="")
# Just using the above dataframe will only have the venue categories as columns which is not much informative
# We'll add PostalCode, Borough and Neighborhood columns to this one-hot encoded dataframe
toronto_onehot_encoding.insert(loc=0, column="PostalCode", value=toronto_boroughs_venues["PostalCode"])
toronto_onehot_encoding.insert(loc=1, column="Borough", value=toronto_boroughs_venues["Borough"])
toronto_onehot_encoding.drop(["Neighborhood"], axis=1, inplace=True)
toronto_onehot_encoding.insert(loc=2, column="Neighborhood", value=toronto_boroughs_venues["Neighborhood"])
print(toronto_onehot_encoding.shape)
toronto_onehot_encoding.head()

(1163, 217)


Unnamed: 0,PostalCode,Borough,Neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Hobby Shop,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Movie Theater,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Outdoor Sculpture,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Frequency of each Venue Category

In [33]:
# Group the object (string) features/columns and then find the mean of the one-hot encoded (int) venue categories
toronto_venue_category_freq = toronto_onehot_encoding.groupby(["PostalCode","Borough","Neighborhood"]).mean().reset_index()
print(toronto_venue_category_freq.shape)
toronto_venue_category_freq.head(10)

(38, 217)


Unnamed: 0,PostalCode,Borough,Neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Beer Bar,Beer Store,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Hobby Shop,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Movie Theater,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Outdoor Sculpture,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West,Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.214286,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.02381,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381
2,M4L,East Toronto,"The Beaches West,India Bazaar",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.102564,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.025641,0.0,0.076923,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,"Moore Park,Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West",0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0


## Top 10 most Common Venue Categories in each Neighborhood

In [34]:
def return_most_common_venue_category(row, number_of_top_venue):
    row_categories = row.iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:number_of_top_venue]

In [35]:
# To return top 10 venues
number_of_top_venue = 10

# number_suffix for 1"st", 2"nd", 3"rd" 
number_suffix = ["st", "nd", "rd"]

# create columns according to number of top venues
columns1 = ["Borough","Neighborhood"]
columns2 = []
for num in np.arange(number_of_top_venue):
    try:
        columns2.append("{}{} Most Common Venue".format((num+1), number_suffix[num]))
    except:
        columns2.append("{}th Most Common Venue".format(num+1))
        
# Create a new dataframe to hold most common venues
columns = columns1 + columns2
df_common_venue_category = pd.DataFrame(columns=columns)
# Drop "Borough" as we have already include the columns names, do the same for "Neighborhood" column also
df_common_venue_category.drop(["Borough"], axis=1, inplace=True)
df_common_venue_category.insert(loc=0, column="Borough", value=toronto_venue_category_freq["Borough"])
#df_common_venue_category["Borough"] = toronto_venue_category_freq["Borough"]
df_common_venue_category.drop(["Neighborhood"], axis=1, inplace=True)
df_common_venue_category.insert(loc=1, column="Neighborhood", value=toronto_venue_category_freq["Neighborhood"])
#df_common_venue_category["Neighborbood"] = toronto_venue_category_freq["Neighborhood"]

for num in np.arange(toronto_venue_category_freq.shape[0]):
    df_common_venue_category.iloc[num, 2:] = return_most_common_venue_category(toronto_venue_category_freq.iloc[num, 3:], number_of_top_venue)
    
#df_common_venue_category.sort_values(columns2, inplace=True)
df_common_venue_category

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,The Beaches,Health Food Store,Trail,Coffee Shop,Pub,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,East Toronto,"The Danforth West,Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Bubble Tea Shop,Sports Bar,Spa,Bookstore,Juice Bar
2,East Toronto,"The Beaches West,India Bazaar",Pizza Place,Ice Cream Shop,Pet Store,Pub,Movie Theater,Sandwich Place,Burrito Place,Burger Joint,Brewery,Fast Food Restaurant
3,East Toronto,Studio District,Café,Coffee Shop,Bakery,American Restaurant,Italian Restaurant,Clothing Store,Stationery Store,Middle Eastern Restaurant,Bookstore,Fish Market
4,Central Toronto,Lawrence Park,Dim Sum Restaurant,Park,Bus Line,Swim School,Yoga Studio,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
5,Central Toronto,Davisville North,Breakfast Spot,Clothing Store,Dog Run,Food & Drink Shop,Dance Studio,Hotel,Park,Gym,Sandwich Place,Ethiopian Restaurant
6,Central Toronto,North Toronto West,Clothing Store,Coffee Shop,Sporting Goods Shop,Yoga Studio,Restaurant,Rental Car Location,Diner,Chinese Restaurant,Salon / Barbershop,Mexican Restaurant
7,Central Toronto,Davisville,Dessert Shop,Pizza Place,Sandwich Place,Sushi Restaurant,Coffee Shop,Café,Restaurant,Italian Restaurant,Thai Restaurant,Seafood Restaurant
8,Central Toronto,"Moore Park,Summerhill East",Playground,Restaurant,Park,Gym,Gluten-free Restaurant,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run
9,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West",Pub,Coffee Shop,Pizza Place,Light Rail Station,Sports Bar,Restaurant,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint


## Clustering Neighbors using KMeans Algorithm

Let's run Kmeans to cluster the neighborhood into 3 clusters

In [36]:
# Import Libraries
# Builtin colormap module
import matplotlib.cm as cm
# This module includes functions and classes for color specification conversions, 
# and for mapping numbers to colors in a 1-D array of colors called a colormap.
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [37]:
# Specify number of clusters
k_clusters = 3

# Create a Dataframe with only venue categories
df_toronto_clustering = toronto_venue_category_freq.drop(["PostalCode","Borough","Neighborhood"], axis=1)

# Create a model and fit the data to generate clusters
k_means_clustering = KMeans(n_clusters=k_clusters, random_state=0).fit(df_toronto_clustering)

# Create a new dataframe by assigning df_toronto_boroughs (df that contains East, Central, Downtown and Wes Toronto)
# that we have created earlier
df_toronto_clustered = df_toronto_boroughs

# Add clustering labels
df_toronto_clustered.insert(loc=0, column="Cluster Label", value=k_means_clustering.labels_)

# Let's merge our "df_toronto_boroughs" dataframe with the "df_common_venue_category" dataframe so that we have the
# venue categories along with the each borough and neighborhood
df_toronto_clustered = df_toronto_clustered.join(df_common_venue_category.drop(["Neighborhood"], axis=1).set_index("Borough"), on="Borough")
#df_toronto_clustered.set_index("Cluster Label")
df_toronto_clustered

Unnamed: 0,Cluster Label,Borough,PostalCode,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,East Toronto,M4E,The Beaches,43.676357,-79.293031,Health Food Store,Trail,Coffee Shop,Pub,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
0,1,East Toronto,M4E,The Beaches,43.676357,-79.293031,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Bubble Tea Shop,Sports Bar,Spa,Bookstore,Juice Bar
0,1,East Toronto,M4E,The Beaches,43.676357,-79.293031,Pizza Place,Ice Cream Shop,Pet Store,Pub,Movie Theater,Sandwich Place,Burrito Place,Burger Joint,Brewery,Fast Food Restaurant
0,1,East Toronto,M4E,The Beaches,43.676357,-79.293031,Café,Coffee Shop,Bakery,American Restaurant,Italian Restaurant,Clothing Store,Stationery Store,Middle Eastern Restaurant,Bookstore,Fish Market
0,1,East Toronto,M4E,The Beaches,43.676357,-79.293031,Light Rail Station,Yoga Studio,Spa,Gym / Fitness Center,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Comic Shop,Park
1,1,East Toronto,M4K,"The Danforth West,Riverdale",43.679557,-79.352188,Health Food Store,Trail,Coffee Shop,Pub,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,1,East Toronto,M4K,"The Danforth West,Riverdale",43.679557,-79.352188,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Bubble Tea Shop,Sports Bar,Spa,Bookstore,Juice Bar
1,1,East Toronto,M4K,"The Danforth West,Riverdale",43.679557,-79.352188,Pizza Place,Ice Cream Shop,Pet Store,Pub,Movie Theater,Sandwich Place,Burrito Place,Burger Joint,Brewery,Fast Food Restaurant
1,1,East Toronto,M4K,"The Danforth West,Riverdale",43.679557,-79.352188,Café,Coffee Shop,Bakery,American Restaurant,Italian Restaurant,Clothing Store,Stationery Store,Middle Eastern Restaurant,Bookstore,Fish Market
1,1,East Toronto,M4K,"The Danforth West,Riverdale",43.679557,-79.352188,Light Rail Station,Yoga Studio,Spa,Gym / Fitness Center,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Comic Shop,Park


## Visualize the Clusters

In [38]:
# Create a map with Toronto location coordinates
map_toronto_clusters = folium.Map(location = [lati_toronto, long_toronto], zoom_start=12)

# Set color scheme for the clusters
x = np.arange(k_clusters)
ys = [i + x + (i*x)**2 for i in range(k_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for lat_c, lng_c, postcode_c, borough_c, neighborhood_c, cluster in zip(df_toronto_clustered["Latitude"], df_toronto_clustered["Longitude"], df_toronto_clustered["PostalCode"], df_toronto_clustered["Borough"], df_toronto_clustered["Neighborhood"], df_toronto_clustered["Cluster Label"]):
    label = "{}, {}, ({}) - Cluster {}".format(neighborhood_c, borough_c, postcode_c, cluster)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
    [lat_c, lng_c],
    radius=5,
    popup=label,
    color=rainbow[cluster-1],
    fill=True,
    fill_color=rainbow[cluster-1],
    fill_opacity=0.7).add_to(map_toronto_clusters)

map_toronto_clusters

## Number of areas in each cluster

In [39]:
cluster_count = df_toronto_clustered["Cluster Label"].value_counts()
#print(cluster_count)
print("Areas in Cluster 0: ",cluster_count[0])
print("Areas in Cluster 1: ",cluster_count[1])
print("Areas in Cluster 2: ",cluster_count[2])

Areas in Cluster 0:  45
Areas in Cluster 1:  412
Areas in Cluster 2:  9
