# Finding the ideal neighborhood for Apartments20

This notebook will use K-means clustering to find neighborhoods in Toronto for a fictitous company, Apartments20, to build their new apartment building. 

In [3]:
#import neccessary libraries
import pandas as pd
import numpy as np
import requests

In [4]:
#Scrape the data from the wikipedia URL to a Pandas Dataframe

url = requests.get('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050.').text
df = pd.read_html(url)[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [5]:
#Now lets drop columns with missing data on borough and Neighborhood
df = df[df.Borough != 'Not assigned']
df = df[df.Neighbourhood != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [6]:
df.shape

(210, 3)

In [7]:
#aggregate neighborhoods that have the same postal code into the same row
#This is neccessary because our geographical coordinates correspond to the PostCode and not the Neighborhood
aggregation_functions = {'Postcode' : 'first', 'Borough': 'first', 'Neighbourhood': ', '.join}
df_agg = df.groupby(df['Postcode']).aggregate(aggregation_functions)

In [8]:
df_agg.head()

Unnamed: 0_level_0,Postcode,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M1B,M1B,Scarborough,"Rouge, Malvern"
M1C,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,M1G,Scarborough,Woburn
M1H,M1H,Scarborough,Cedarbrae


In [9]:
#Checking to make sure we don't have any null values
df_agg.isnull().values.any()

False

In [10]:
#Checking shape of our dataframe
df_agg.shape

(103, 3)

# Adding Coordinates to our Dataframe

In [11]:
#Reading coordinates data from csv into a pandas dataframe
df_coords = pd.read_csv('http://cocl.us/Geospatial_data')

In [12]:
df_coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
df_coords.shape

(103, 3)

In [14]:
#Changing Column 'Postcode' in df_agg to match with 'Postal Code' in df_coords. Then Executing merge.
df_agg.rename(columns={'Postcode':'Postal Code'}, 
                 inplace=True)
df_merge = pd.merge(left=df_agg, right=df_coords)

In [15]:
df_merge.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [16]:
df_merge.shape

(103, 5)

# Pulling venue data from Foursquare

In [2]:
# The code was removed by Watson Studio for sharing.

In [18]:
#Function to return the venues within a specified radius, given a list of postcodes/neighborhoods, coordinates.
#The radius is set at 1 mile, or 1,609 meters. 
#Returns a dataframe showing all venues within a mile of the Postal Codes
#Columns of the dataframe are: Postal Code, Latitude, Longitude, Venue, Venue Lattitude, Venue Longitude, Venue Category

def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
toronto_venues = getNearbyVenues(names=df_merge['Postal Code'],
                                   latitudes=df_merge['Latitude'],
                                   longitudes=df_merge['Longitude']
                                  )

M1B
M1C
M1E
M1G
M1H
M1J
M1K
M1L
M1M
M1N
M1P
M1R
M1S
M1T
M1V
M1W
M1X
M2H
M2J
M2K
M2L
M2M
M2N
M2P
M2R
M3A
M3B
M3C
M3H
M3J
M3K
M3L
M3M
M3N
M4A
M4B
M4C
M4E
M4G
M4H
M4J
M4K
M4L
M4M
M4N
M4P
M4R
M4S
M4T
M4V
M4W
M4X
M4Y
M5A
M5B
M5C
M5E
M5G
M5H
M5J
M5K
M5L
M5M
M5N
M5P
M5R
M5S
M5T
M5V
M5W
M5X
M6A
M6B
M6C
M6E
M6G
M6H
M6J
M6K
M6L
M6M
M6N
M6P
M6R
M6S
M7A
M7R
M7Y
M8V
M8W
M8X
M8Y
M8Z
M9A
M9B
M9C
M9L
M9M
M9N
M9P
M9R
M9V
M9W


In [20]:
print(toronto_venues.shape)
toronto_venues.head()

(4930, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
1,M1B,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
2,M1B,43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
3,M1B,43.806686,-79.194353,Harvey's,43.80002,-79.198307,Restaurant
4,M1B,43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store


# Data Prep for K-Means clustering

We will use One-Hot encoding to convert the categorical variable "Venue Category" into binary variables

In [92]:
#one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood / postal code column back to dataframe
toronto_onehot['Postal Code'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
#fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
#toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Postal Code
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M1B
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M1B
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M1B
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M1B
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M1B


In [93]:
toronto_onehot.shape

(4916, 330)

In [94]:
#Move Neighborhood/Postal Code to the front of the dataframe
cols = list(toronto_onehot)
# move the column to head of list using index, pop and insert
cols.insert(0, cols.pop(cols.index('Postal Code')))
cols
# use ix to reorder
toronto_onehot = toronto_onehot.loc[:, cols]
toronto_onehot.head()

Unnamed: 0,Postal Code,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [95]:
# Next, we'll group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
#This will show us how prevelant each venue type is in each "neighborhood"
toronto_grouped = toronto_onehot.groupby('Postal Code').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Postal Code,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0


In [96]:
#function to sort the venues in descending order based on how prevalent they are
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [108]:
#create a dataframe to display the top 10 venues of each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = toronto_grouped['Postal Code']


for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Trail,Coffee Shop,Bank,Restaurant,Chinese Restaurant,Bakery,Paper / Office Supplies Store,Caribbean Restaurant,Greek Restaurant
1,M1C,Italian Restaurant,Breakfast Spot,Burger Joint,Park,Playground,Zoo,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
2,M1E,Pizza Place,Fast Food Restaurant,Bank,Coffee Shop,Burger Joint,Greek Restaurant,Liquor Store,Sandwich Place,Supermarket,Juice Bar
3,M1G,Park,Coffee Shop,Mobile Phone Shop,Indian Restaurant,Fast Food Restaurant,Pharmacy,Chinese Restaurant,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant
4,M1H,Coffee Shop,Bakery,Gas Station,Bank,Indian Restaurant,Athletics & Sports,Chinese Restaurant,Thai Restaurant,Fried Chicken Joint,Grocery Store


We can use the above dataframe for our K-means clustering algorithm

# K Means Clustering

In [109]:
from sklearn.cluster import KMeans
kclusters = 8

toronto_grouped_clustering = toronto_grouped.drop('Postal Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([5, 7, 0, 0, 5, 0, 5, 0, 0, 3], dtype=int32)

In [111]:
# add clustering labels to our sorted venues dataframe
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [112]:

#neighborhoods_venues_sorted = neighborhoods_venues_sorted.groupby('Cluster Labels', axis = 1)
neighborhoods_venues_sorted.head(10)

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,5,M1B,Fast Food Restaurant,Trail,Coffee Shop,Bank,Restaurant,Chinese Restaurant,Bakery,Paper / Office Supplies Store,Caribbean Restaurant,Greek Restaurant
1,7,M1C,Italian Restaurant,Breakfast Spot,Burger Joint,Park,Playground,Zoo,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant
2,0,M1E,Pizza Place,Fast Food Restaurant,Bank,Coffee Shop,Burger Joint,Greek Restaurant,Liquor Store,Sandwich Place,Supermarket,Juice Bar
3,0,M1G,Park,Coffee Shop,Mobile Phone Shop,Indian Restaurant,Fast Food Restaurant,Pharmacy,Chinese Restaurant,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant
4,5,M1H,Coffee Shop,Bakery,Gas Station,Bank,Indian Restaurant,Athletics & Sports,Chinese Restaurant,Thai Restaurant,Fried Chicken Joint,Grocery Store
5,0,M1J,Ice Cream Shop,Convenience Store,Coffee Shop,Sandwich Place,Fast Food Restaurant,Pizza Place,Bowling Alley,Restaurant,Grocery Store,Train Station
6,5,M1K,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Discount Store,Grocery Store,Bank,Asian Restaurant,Light Rail Station,Sandwich Place,Pharmacy
7,0,M1L,Intersection,Coffee Shop,Bus Line,Convenience Store,Bakery,Park,Mexican Restaurant,Fast Food Restaurant,Sandwich Place,Beer Store
8,0,M1M,Pizza Place,Ice Cream Shop,Beach,Sports Bar,Cajun / Creole Restaurant,Burger Joint,Park,Hardware Store,Electronics Store,Elementary School
9,3,M1N,Park,Restaurant,Café,Skating Rink,Thai Restaurant,General Entertainment,Diner,Dessert Shop,Gym,Photography Studio


In [59]:
#neighborhoods_venues_sorted['Cluster Labels'] = neighborhoods_venues_sorted['Cluster Labels'].astype('int')

pandas.core.frame.DataFrame

Now lets review the characterstics of each cluster to see which most closely aligns with what Apartments20 is looking for

In [113]:
cluster_0 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 0]
cluster_0.head()

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,0,M1E,Pizza Place,Fast Food Restaurant,Bank,Coffee Shop,Burger Joint,Greek Restaurant,Liquor Store,Sandwich Place,Supermarket,Juice Bar
3,0,M1G,Park,Coffee Shop,Mobile Phone Shop,Indian Restaurant,Fast Food Restaurant,Pharmacy,Chinese Restaurant,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant
5,0,M1J,Ice Cream Shop,Convenience Store,Coffee Shop,Sandwich Place,Fast Food Restaurant,Pizza Place,Bowling Alley,Restaurant,Grocery Store,Train Station
7,0,M1L,Intersection,Coffee Shop,Bus Line,Convenience Store,Bakery,Park,Mexican Restaurant,Fast Food Restaurant,Sandwich Place,Beer Store
8,0,M1M,Pizza Place,Ice Cream Shop,Beach,Sports Bar,Cajun / Creole Restaurant,Burger Joint,Park,Hardware Store,Electronics Store,Elementary School


In [114]:
cluster_1 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 1]
cluster_1.head(10)

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,1,M2L,Park,Pool,Farm,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space


In [115]:
cluster_2 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 2]
cluster_2.head()

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,2,M3M,Vietnamese Restaurant,Baseball Field,Restaurant,Zoo,Farm,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant


In [116]:
cluster_3 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 3]
cluster_3.head(10)

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,3,M1N,Park,Restaurant,Café,Skating Rink,Thai Restaurant,General Entertainment,Diner,Dessert Shop,Gym,Photography Studio
29,3,M3K,Turkish Restaurant,Coffee Shop,Other Repair Shop,Sandwich Place,Chinese Restaurant,Liquor Store,Electronics Store,Italian Restaurant,Park,Gym
36,3,M4E,Pub,Coffee Shop,Beach,Pizza Place,Japanese Restaurant,Breakfast Spot,Caribbean Restaurant,Bar,Tea Room,Bakery
39,3,M4J,Coffee Shop,Café,Greek Restaurant,Pizza Place,Convenience Store,Park,Bar,Beer Bar,Fast Food Restaurant,Ethiopian Restaurant
40,3,M4K,Greek Restaurant,Coffee Shop,Café,Pub,Italian Restaurant,Pizza Place,Fast Food Restaurant,Furniture / Home Store,Ramen Restaurant,Bookstore
41,3,M4L,Indian Restaurant,Coffee Shop,Beach,Grocery Store,Café,Brewery,Park,Burrito Place,Bakery,Harbor / Marina
42,3,M4M,Coffee Shop,Bar,Café,Diner,Vietnamese Restaurant,Bakery,Brewery,American Restaurant,Italian Restaurant,French Restaurant
43,3,M4N,Park,Bookstore,Trail,Gym / Fitness Center,Coffee Shop,College Gym,Café,College Quad,Fast Food Restaurant,Farmers Market
44,3,M4P,Coffee Shop,Italian Restaurant,Dessert Shop,Café,Gym,Pizza Place,Pharmacy,Sushi Restaurant,Supermarket,Restaurant
45,3,M4R,Coffee Shop,Italian Restaurant,Skating Rink,Diner,Mexican Restaurant,Café,Park,Sushi Restaurant,Bakery,Jazz Club


In [117]:
cluster_4 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 4]
cluster_4.head()

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
101,4,M9W,Hotel,Moving Target,Coffee Shop,Farm,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space


In [118]:
cluster_5 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 5]
cluster_5.head()

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,5,M1B,Fast Food Restaurant,Trail,Coffee Shop,Bank,Restaurant,Chinese Restaurant,Bakery,Paper / Office Supplies Store,Caribbean Restaurant,Greek Restaurant
4,5,M1H,Coffee Shop,Bakery,Gas Station,Bank,Indian Restaurant,Athletics & Sports,Chinese Restaurant,Thai Restaurant,Fried Chicken Joint,Grocery Store
6,5,M1K,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Discount Store,Grocery Store,Bank,Asian Restaurant,Light Rail Station,Sandwich Place,Pharmacy
10,5,M1P,Restaurant,Coffee Shop,Pharmacy,Electronics Store,Chinese Restaurant,Furniture / Home Store,Fast Food Restaurant,Bakery,Asian Restaurant,Indian Restaurant
11,5,M1R,Pizza Place,Middle Eastern Restaurant,Intersection,Grocery Store,Burger Joint,Furniture / Home Store,Restaurant,Bar,Coffee Shop,Korean Restaurant


In [119]:
cluster_6 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 6]
cluster_6.head()

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
90,6,M8Y,Park,Italian Restaurant,Eastern European Restaurant,Shopping Mall,Ice Cream Shop,Event Space,Dumpling Restaurant,Electronics Store,Elementary School,Empanada Restaurant


In [121]:
cluster_7 = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels'] == 7]
cluster_7.head()

Unnamed: 0,Cluster Labels,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,7,M1C,Italian Restaurant,Breakfast Spot,Burger Joint,Park,Playground,Zoo,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant


In [None]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood so we can display a map
map_merge = df_merge.join(neighborhoods_venues_sorted.set_index('Postal Code'), on='Postal Code')


map_merge.head()

In [122]:
#Installing Folium for Map
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                       

In [27]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
latitude = 43.6532
longitude = -79.3832

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(map_merge['Latitude'], map_merge['Longitude'], map_merge['Postal Code'], map_merge['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    if (cluster == cluster):
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[int(cluster-1)],
            fill=True,
            fill_color=rainbow[int(cluster-1)],
            fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

ModuleNotFoundError: No module named 'folium'

In [None]:
#The next thing I have to do is sort the clusters themselves by Most common venues and then use that to recommend where Apartments20 should build the apartment building
