In [143]:
import pandas as pd
import numpy as np
import folium
import requests
from pandas.io.json import json_normalize
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# Postal Code

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url, header = 0)[0]

In [3]:
df.columns = ['PostalCode', 'Borough', 'Neighborhood']

In [4]:
#process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned
df = df[df['Borough'] != 'Not assigned']

In [5]:
#combine into one row with the neighborhoods separated with a comma
post_neigh = df.groupby('PostalCode')['Neighborhood'].agg([('Neighborhood', ', '.join)])

In [6]:
df = df.drop('Neighborhood', axis = 1)

In [7]:
df = pd.merge(df, post_neigh, on = 'PostalCode', how = 'outer')

In [8]:
df = df.drop_duplicates()

In [9]:
#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
df['Neighborhood'] = np.where(df['Neighborhood'] == 'Not assigned', df['Borough'], df['Neighborhood'])

# Coordinate Data

In [10]:
#coordinate points for postal codes
coord = pd.read_csv('Geospatial_Coordinates.csv')

In [11]:
coord.rename(columns={'Postal Code':'PostalCode'}, inplace=True)

In [12]:
df = pd.merge(df, coord, on = 'PostalCode', how = 'outer')

In [15]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


# Create map

In [17]:
#Toronto coord is 43.6532° N, 79.3832° W
latitude = 43.6532
longitude = -79.3832

In [18]:
# create map of Toronto using latitude and longitude values
map_t = folium.Map(location=[latitude, longitude], zoom_start=10)

In [19]:
# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_t)  

# Foursquare credentials

In [22]:
CLIENT_ID = 'ZI5BDOIOZIM2EUDPX2CZ1SUKG3KEICGZ52V1Y4QLWNM3D0EC' # your Foursquare ID
CLIENT_SECRET = 'MH4NXV4UIH5UYI2YLJLWSMUMOMDADD1H4P0UQLL2ZOVQWHKD' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZI5BDOIOZIM2EUDPX2CZ1SUKG3KEICGZ52V1Y4QLWNM3D0EC
CLIENT_SECRET:MH4NXV4UIH5UYI2YLJLWSMUMOMDADD1H4P0UQLL2ZOVQWHKD


# Explore

In [25]:
df.loc[1, 'Neighborhood']

'Victoria Village'

In [27]:
neighborhood_latitude = df.loc[1, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[1, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[1, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Victoria Village are 43.725882299999995, -79.31557159999998.


# Explore for all neighborhoods

In [32]:
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
venues = getNearbyVenues(names = df['Neighborhood'],
                                   latitudes = df['Latitude'],
                                   longitudes = df['Longitude']
                        )

In [34]:
venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
5,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
6,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.313620,Intersection
7,Victoria Village,43.725882,-79.315572,Pizza Nova,43.725824,-79.312860,Pizza Place
8,"Harbourfront, Regent Park",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
9,"Harbourfront, Regent Park",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop


In [36]:
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,5,5,5,5,5,5
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",9,9,9,9,9,9
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Downsview North, Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Berczy Park,57,57,57,57,57,57
"Birch Cliff, Cliffside West",4,4,4,4,4,4


In [35]:
print(venues.shape)

(2238, 7)


In [37]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 278 uniques categories.


In [38]:
venues['Venue Category'].unique()

array(['Park', 'Fast Food Restaurant', 'Food & Drink Shop',
       'Hockey Arena', 'Coffee Shop', 'Portuguese Restaurant',
       'Intersection', 'Bakery', 'Gym / Fitness Center', 'Spa',
       'Breakfast Spot', 'Restaurant', 'Pub', 'Historic Site',
       'Chocolate Shop', 'Farmers Market', 'Dessert Shop',
       'Performing Arts Venue', 'Mexican Restaurant', 'Café', 'Theater',
       'French Restaurant', 'Italian Restaurant', 'Event Space',
       'Yoga Studio', 'Cosmetics Shop', 'Shoe Store', 'Art Gallery',
       'Brewery', 'Electronics Store', 'Bank', 'Beer Store', 'Hotel',
       'Health Food Store', 'Antique Shop', 'Boutique',
       'Furniture / Home Store', 'Vietnamese Restaurant',
       'Accessories Store', 'Clothing Store', 'Fraternity House',
       "Women's Store", 'Miscellaneous Shop', 'Gym', 'Sushi Restaurant',
       'Creperie', 'Hobby Shop', 'Arts & Crafts Store', 'Burrito Place',
       'Persian Restaurant', 'Diner', 'Japanese Restaurant',
       'Wings Joint', 'Burg

In [39]:
# different possible names for coffee shops and offices
cafe_office_list = ['Coffee Shop', 'Dessert Shop', 'Café', 'Tech Startup', 'Coworking Space', 'Office']

In [47]:
venues = venues[venues['Venue Category'].isin(cafe_office_list)]

In [80]:
venues = venues.replace({'Dessert Shop': 'Coffee Shop', 'Café': 'Coffee Shop', 'Tech Startup': 'Office', 'Coworking Space': 'Office'})

In [148]:
venues[venues['Venue Category'] == 'Office'].count()

Neighborhood              5
Neighborhood Latitude     5
Neighborhood Longitude    5
Venue                     5
Venue Latitude            5
Venue Longitude           5
Venue Category            5
dtype: int64

In [121]:
venues['Venue Category'].unique()

array(['Coffee Shop', 'Office'], dtype=object)

In [124]:
venues[venues['Venue Category'] == 'Office']

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
382,"Guildwood, Morningside, West Hill",43.763573,-79.188711,chatr Mobile,43.765917,-79.191672,Office
543,Central Bay Street,43.657952,-79.387383,Toronto Vegetarian Association,43.655953,-79.392854,Office
865,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,TELUS Store,43.643312,-79.380999,Office
1145,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576,TELUS Store,43.643312,-79.380999,Office
1345,Studio District,43.659526,-79.340923,District 28,43.655174,-79.340598,Office


In [170]:
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
8,"Harbourfront, Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
19,"Harbourfront, Regent Park",43.65426,-79.360636,Starbucks,43.651327,-79.364329,Coffee Shop
20,"Harbourfront, Regent Park",43.65426,-79.360636,Sumach Espresso,43.658135,-79.359515,Coffee Shop
21,"Harbourfront, Regent Park",43.65426,-79.360636,Rooster Coffee,43.6519,-79.365609,Coffee Shop


In [156]:
venues.groupby(['Neighborhood','Venue Category']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Neighborhood,Venue Category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",Coffee Shop,10,10,10,10,10
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",Coffee Shop,1,1,1,1,1
"Alderwood, Long Branch",Coffee Shop,1,1,1,1,1
"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,2,2,2,2,2
Bayview Village,Coffee Shop,1,1,1,1,1
"Bedford Park, Lawrence Manor East",Coffee Shop,3,3,3,3,3
Berczy Park,Coffee Shop,7,7,7,7,7
"Birch Cliff, Cliffside West",Coffee Shop,1,1,1,1,1
"Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe",Coffee Shop,1,1,1,1,1
"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,4,4,4,4,4


# Analyze each neighborhood

In [83]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighborhood,Coffee Shop,Office
4,Victoria Village,1,0
8,"Harbourfront, Regent Park",1,0
19,"Harbourfront, Regent Park",1,0
20,"Harbourfront, Regent Park",1,0
21,"Harbourfront, Regent Park",1,0


In [84]:
onehot.shape

(310, 3)

In [85]:
grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped.head()

Unnamed: 0,Neighborhood,Coffee Shop,Office
0,"Adelaide, King, Richmond",1.0,0.0
1,"Albion Gardens, Beaumond Heights, Humbergate, ...",1.0,0.0
2,"Alderwood, Long Branch",1.0,0.0
3,"Bathurst Manor, Downsview North, Wilson Heights",1.0,0.0
4,Bayview Village,1.0,0.0


In [86]:
#print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Alderwood, Long Branch----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Bathurst Manor, Downsview North, Wilson Heights----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Bayview Village----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Bedford Park, Lawrence Manor East----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Berczy Park----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Birch Cliff, Cliffside West----
         venue  freq
0  Coffee Shop   1.0
1       Office   0.0


----Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe----
         venue  freq
0  Coffee Shop   1.0
1       

# Cluster neighborhoods

In [173]:
venues2 = venues.drop('Venue Category', axis = 1)

In [176]:
venues2 = venues2.drop('Venue', axis = 1)

In [178]:
venues2.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Latitude,Venue Longitude
4,Victoria Village,43.725882,-79.315572,43.725517,-79.313103
8,"Harbourfront, Regent Park",43.65426,-79.360636,43.653559,-79.361809
19,"Harbourfront, Regent Park",43.65426,-79.360636,43.651327,-79.364329
20,"Harbourfront, Regent Park",43.65426,-79.360636,43.658135,-79.359515
21,"Harbourfront, Regent Park",43.65426,-79.360636,43.6519,-79.365609


In [179]:
# run k-means clustering
kclusters = 5
grouped_clustering = venues2.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(grouped_clustering)
kmeans.labels_[0:10]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [180]:
grouped_clustering.insert(0, 'Cluster Labels', kmeans.labels_)

In [181]:
grouped_clustering.head()

Unnamed: 0,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue Latitude,Venue Longitude
4,1,43.725882,-79.315572,43.725517,-79.313103
8,0,43.65426,-79.360636,43.653559,-79.361809
19,0,43.65426,-79.360636,43.651327,-79.364329
20,0,43.65426,-79.360636,43.658135,-79.359515
21,0,43.65426,-79.360636,43.6519,-79.365609


In [190]:
joined = venues.merge(grouped_clustering, on = ['Venue Latitude', 'Venue Longitude'])

In [191]:
joined.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude_x,Neighborhood Longitude_x,Venue,Venue Latitude,Venue Longitude,Venue Category,Cluster Labels,Neighborhood Latitude_y,Neighborhood Longitude_y
0,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop,1,43.725882,-79.315572
1,"Harbourfront, Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop,0,43.65426,-79.360636
2,"Harbourfront, Regent Park",43.65426,-79.360636,Starbucks,43.651327,-79.364329,Coffee Shop,0,43.65426,-79.360636
3,"Harbourfront, Regent Park",43.65426,-79.360636,Sumach Espresso,43.658135,-79.359515,Coffee Shop,0,43.65426,-79.360636
4,"Harbourfront, Regent Park",43.65426,-79.360636,Rooster Coffee,43.6519,-79.365609,Coffee Shop,0,43.65426,-79.360636


In [192]:
coffee = joined[joined['Venue Category'] == 'Coffee Shop']

In [193]:
office = joined[joined['Venue Category'] == 'Office']

In [194]:
office.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude_x,Neighborhood Longitude_x,Venue,Venue Latitude,Venue Longitude,Venue Category,Cluster Labels,Neighborhood Latitude_y,Neighborhood Longitude_y
240,"Guildwood, Morningside, West Hill",43.763573,-79.188711,chatr Mobile,43.765917,-79.191672,Office,3,43.763573,-79.188711
287,Central Bay Street,43.657952,-79.387383,Toronto Vegetarian Association,43.655953,-79.392854,Office,0,43.657952,-79.387383
400,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,TELUS Store,43.643312,-79.380999,Office,0,43.640816,-79.381752
401,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,TELUS Store,43.643312,-79.380999,Office,0,43.647177,-79.381576
402,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576,TELUS Store,43.643312,-79.380999,Office,0,43.640816,-79.381752


In [195]:
coffee.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude_x,Neighborhood Longitude_x,Venue,Venue Latitude,Venue Longitude,Venue Category,Cluster Labels,Neighborhood Latitude_y,Neighborhood Longitude_y
0,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop,1,43.725882,-79.315572
1,"Harbourfront, Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop,0,43.65426,-79.360636
2,"Harbourfront, Regent Park",43.65426,-79.360636,Starbucks,43.651327,-79.364329,Coffee Shop,0,43.65426,-79.360636
3,"Harbourfront, Regent Park",43.65426,-79.360636,Sumach Espresso,43.658135,-79.359515,Coffee Shop,0,43.65426,-79.360636
4,"Harbourfront, Regent Park",43.65426,-79.360636,Rooster Coffee,43.6519,-79.365609,Coffee Shop,0,43.65426,-79.360636


In [196]:
coffee.groupby(['Cluster Labels']).count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude_x,Neighborhood Longitude_x,Venue,Venue Latitude,Venue Longitude,Venue Category,Neighborhood Latitude_y,Neighborhood Longitude_y
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,508,508,508,508,508,508,508,508,508
1,40,40,40,40,40,40,40,40,40
2,29,29,29,29,29,29,29,29,29
3,4,4,4,4,4,4,4,4,4
4,6,6,6,6,6,6,6,6,6


In [197]:
office.groupby(['Cluster Labels']).count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude_x,Neighborhood Longitude_x,Venue,Venue Latitude,Venue Longitude,Venue Category,Neighborhood Latitude_y,Neighborhood Longitude_y
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,6,6,6,6,6,6,6,6,6
3,1,1,1,1,1,1,1,1,1


In [199]:
joined['Cluster Labels'] = joined['Cluster Labels'].astype(int)

# Visualize on map - clusters

In [201]:
joined

Unnamed: 0,Neighborhood,Neighborhood Latitude_x,Neighborhood Longitude_x,Venue,Venue Latitude,Venue Longitude,Venue Category,Cluster Labels,Neighborhood Latitude_y,Neighborhood Longitude_y
0,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop,1,43.725882,-79.315572
1,"Harbourfront, Regent Park",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop,0,43.654260,-79.360636
2,"Harbourfront, Regent Park",43.654260,-79.360636,Starbucks,43.651327,-79.364329,Coffee Shop,0,43.654260,-79.360636
3,"Harbourfront, Regent Park",43.654260,-79.360636,Sumach Espresso,43.658135,-79.359515,Coffee Shop,0,43.654260,-79.360636
4,"Harbourfront, Regent Park",43.654260,-79.360636,Rooster Coffee,43.651900,-79.365609,Coffee Shop,0,43.654260,-79.360636
5,"Harbourfront, Regent Park",43.654260,-79.360636,Arvo,43.649963,-79.361442,Coffee Shop,0,43.654260,-79.360636
6,"Harbourfront, Regent Park",43.654260,-79.360636,Cacao 70,43.650067,-79.360723,Coffee Shop,0,43.654260,-79.360636
7,"Harbourfront, Regent Park",43.654260,-79.360636,ODIN Cafe + Bar,43.656739,-79.356503,Coffee Shop,0,43.654260,-79.360636
8,"Harbourfront, Regent Park",43.654260,-79.360636,Dark Horse Espresso Bar,43.653081,-79.357078,Coffee Shop,0,43.654260,-79.360636
9,"Harbourfront, Regent Park",43.654260,-79.360636,Caffe Furbo,43.649970,-79.358849,Coffee Shop,0,43.654260,-79.360636


In [203]:
merged = joined

In [200]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [205]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Venue Latitude'], merged['Venue Longitude'], merged['Venue'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Visualize on map - coffee shops and offices

In [132]:
final_df = venues[['Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']]

In [133]:
final_df.head()

Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Venue Category
4,Tim Hortons,43.725517,-79.313103,Coffee Shop
8,Tandem Coffee,43.653559,-79.361809,Coffee Shop
19,Starbucks,43.651327,-79.364329,Coffee Shop
20,Sumach Espresso,43.658135,-79.359515,Coffee Shop
21,Rooster Coffee,43.6519,-79.365609,Coffee Shop


In [134]:
final_df = final_df.replace({'Coffee Shop': 0, 'Office': 1})

In [135]:
final_df.head()

Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Venue Category
4,Tim Hortons,43.725517,-79.313103,0
8,Tandem Coffee,43.653559,-79.361809,0
19,Starbucks,43.651327,-79.364329,0
20,Sumach Espresso,43.658135,-79.359515,0
21,Rooster Coffee,43.6519,-79.365609,0


In [138]:
final_df = final_df.drop_duplicates()

In [139]:
final_df[final_df['Venue Category'] == 1]

Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Venue Category
382,chatr Mobile,43.765917,-79.191672,1
543,Toronto Vegetarian Association,43.655953,-79.392854,1
865,TELUS Store,43.643312,-79.380999,1
1345,District 28,43.655174,-79.340598,1


In [140]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(2)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [145]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(final_df['Venue Latitude'], final_df['Venue Longitude'], final_df['Venue'], final_df['Venue Category']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine clusters

In [207]:
venues[venues['Venue'] == 'chatr Mobile']

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
382,"Guildwood, Morningside, West Hill",43.763573,-79.188711,chatr Mobile,43.765917,-79.191672,Office
