In [1]:
import requests
import pandas as pd
import numpy as np
import folium

In [2]:
url  = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(url)
if page.status_code == 200:
    print('Downloaded page successfully')
else:
    print('Unable to download. Error code: {}'.format(page.status_code))

Downloaded page successfully


In [3]:
import lxml
df_html = pd.read_html(url, header=0, na_values = ['Not assigned'])[0]
df_html.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [4]:
df_html.dropna(subset=['Borough'], inplace=True)

In [5]:
n_empty_neighborhood = df_html[df_html['Neighborhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(n_empty_neighborhood))

Number of rows on which Neighborhood column is empty: 0


In [6]:
df_postalcode =pd.DataFrame( df_html.groupby(['Postal code','Borough']).Neighborhood.agg([('Neighborhood', ', '.join)]))
df_postalcode.reset_index(inplace=True)
df_postalcode.head(5)

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [7]:
df_html.dropna(subset=['Borough'], inplace=True)

In [8]:
print('The shape of the dataset:',df_postalcode.shape)

The shape of the dataset: (103, 3)


In [9]:
df_postalcode.to_csv('Toronto_data.csv')

In [10]:
url_csv = 'http://cocl.us/Geospatial_data'
df_coordinate = pd.read_csv(url_csv)
df_coordinate.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
df_neighborhood = pd.read_csv('Toronto_data.csv',index_col=[0])
df_neighborhood.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [12]:
df_coordinate.rename(columns={'Postal Code': 'Postal code'}, inplace=True)

In [13]:
df_neighborhood_coordinates = pd.merge(df_neighborhood, df_coordinate, on='Postal code')
df_neighborhood_coordinates.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
df_neighborhood_coordinates.to_csv('Toronto_data2.csv')

In [15]:
df = pd.read_csv('Toronto_data2.csv', index_col=0)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [16]:
df.groupby('Borough').count()['Neighborhood']

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Scarborough         17
West Toronto         6
York                 5
Name: Neighborhood, dtype: int64

In [17]:
df_torontodata = df[df['Borough'].str.contains('Toronto')]
df_torontodata.reset_index(inplace=True)
df_torontodata.drop('index', axis=1, inplace=True)
df_torontodata.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,The Danforth West / Riverdale,43.679557,-79.352188
2,M4L,East Toronto,India Bazaar / The Beaches West,43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [18]:
print(df_torontodata.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
West Toronto         6
Name: Neighborhood, dtype: int64


In [19]:
boroughs = df_torontodata['Borough'].unique().tolist()

In [20]:
lat_toronto = df_torontodata['Latitude'].mean()
lon_toronto = df_torontodata['Longitude'].mean()
print('The geographical coordinates of Toronto: {}, {}'.format(lat_toronto, lon_toronto))

The geographical coordinates of Toronto: 43.66713498717948, -79.38987324871795


In [21]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3))

In [22]:
toronto_map = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=12)

for lat, lng, borough, neighborhood in zip(df_torontodata['Latitude'], 
                                           df_torontodata['Longitude'],
                                           df_torontodata['Borough'], 
                                           df_torontodata['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(toronto_map)  
    
toronto_map

In [23]:
CLIENT_ID = 'E5YCKOQX2TAQNEINECKNVOG1XO54PULEH0SMCJPV11ROEA30' 
CLIENT_SECRET = 'S1G5JXDQZCTC0NB15WW3244V4VDOOWLCAAGSGX5D4VMZBD20' 
VERSION = '20201704'
LIMIT = 100 
radius = 500 

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
    
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
   
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
toronto_venue = getNearbyVenues(names=df_torontodata['Neighborhood'],
                                latitudes=df_torontodata['Latitude'],
                                longitudes=df_torontodata['Longitude'])

The Beaches
The Danforth West / Riverdale
India Bazaar / The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park / Summerhill East
Summerhill West / Rathnelly / South Hill / Forest Hill SE / Deer Park
Rosedale
St. James Town / Cabbagetown
Church and Wellesley
Regent Park / Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond / Adelaide / King
Harbourfront East / Union Station / Toronto Islands
Toronto Dominion Centre / Design Exchange
Commerce Court / Victoria Hotel
Roselawn
Forest Hill North & West
The Annex / North Midtown / Yorkville
University of Toronto / Harbord
Kensington Market / Chinatown / Grange Park
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport
Stn A PO Boxes
First Canadian Place / Underground city
Christie
Dufferin / Dovercourt Village
Little Portugal / Trinity
Brockton / Parkdale Village / Exhibition Place
High Park / 

In [27]:
toronto_venue.shape

(1602, 7)

In [28]:
toronto_venue.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,56,56,56,56,56,56
Brockton / Parkdale Village / Exhibition Place,23,23,23,23,23,23
Business reply mail Processing CentrE,14,14,14,14,14,14
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport,18,18,18,18,18,18
Central Bay Street,59,59,59,59,59,59
Christie,18,18,18,18,18,18
Church and Wellesley,73,73,73,73,73,73
Commerce Court / Victoria Hotel,100,100,100,100,100,100
Davisville,34,34,34,34,34,34
Davisville North,7,7,7,7,7,7


In [29]:
toronto_venue['Venue Category'].unique()[:100]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood', 'Coffee Shop',
       'Asian Restaurant', 'Greek Restaurant', 'Cosmetics Shop',
       'Italian Restaurant', 'Ice Cream Shop', 'Yoga Studio', 'Brewery',
       'Fruit & Vegetable Store', 'Pizza Place', 'Bookstore',
       'Restaurant', 'Dessert Shop', 'Juice Bar', 'Bubble Tea Shop',
       'Spa', 'Diner', 'Grocery Store', 'Furniture / Home Store', 'Café',
       'Bakery', 'Caribbean Restaurant', 'Indian Restaurant',
       'Japanese Restaurant', 'American Restaurant', 'Frozen Yogurt Shop',
       'Lounge', 'Gym', 'Fish & Chips Shop', 'Fast Food Restaurant',
       'Sushi Restaurant', 'Liquor Store', 'Park', 'Pet Store',
       'Steakhouse', 'Burrito Place', 'Movie Theater', 'Sandwich Place',
       'Intersection', 'Fish Market', 'Gay Bar', 'Cheese Shop',
       'Middle Eastern Restaurant', 'Comfort Food Restaurant',
       'Thai Restaurant', 'Seafood Restaurant', 'Stationery Store',
       'Coworking Space', 'Wine Bar', 'Bar',

In [30]:
"Coffee Shop" in toronto_venue['Venue Category'].unique()

True

In [31]:
to_onehot = pd.get_dummies(toronto_venue[['Venue Category']], prefix="", prefix_sep="")

to_onehot['Neighborhoods'] = toronto_venue['Neighborhood'] 
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1602, 229)


Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(to_grouped.shape)
to_grouped

(39, 229)


Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CN Tower / King and Spadina / Railway Lands / ...,0.055556,0.055556,0.055556,0.111111,0.166667,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.016949
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,...,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397
7,Commerce Court / Victoria Hotel,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
len(to_grouped[to_grouped["Coffee Shop"] > 0])

29

In [34]:
to_icecream = to_grouped[["Neighborhoods","Coffee Shop"]]
to_icecream.head(9)

Unnamed: 0,Neighborhoods,Coffee Shop
0,Berczy Park,0.053571
1,Brockton / Parkdale Village / Exhibition Place,0.086957
2,Business reply mail Processing CentrE,0.0
3,CN Tower / King and Spadina / Railway Lands / ...,0.055556
4,Central Bay Street,0.220339
5,Christie,0.055556
6,Church and Wellesley,0.082192
7,Commerce Court / Victoria Hotel,0.1
8,Davisville,0.058824


In [35]:
from sklearn.cluster import KMeans
toclusters = 3

to_clustering = to_icecream.drop(["Neighborhoods"], 1)

kmeans = KMeans(n_clusters=toclusters, random_state=1)
kmeans.fit_transform(to_clustering)

kmeans.labels_[0:20]

array([0, 0, 1, 0, 2, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1])

In [36]:
to_merged = to_icecream.copy()
to_merged["Cluster Labels"] = kmeans.labels_

In [37]:
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.head(5)

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels
0,Berczy Park,0.053571,0
1,Brockton / Parkdale Village / Exhibition Place,0.086957,0
2,Business reply mail Processing CentrE,0.0,1
3,CN Tower / King and Spadina / Railway Lands / ...,0.055556,0
4,Central Bay Street,0.220339,2


In [38]:
to_merged = to_merged.join(toronto_venue.set_index("Neighborhood"), on="Neighborhood")
print(to_merged.shape)
to_merged.head()

(1602, 9)


Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.053571,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.053571,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.053571,0,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.053571,0,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.053571,0,43.644771,-79.373306,Starbucks,43.644285,-79.369771,Coffee Shop


In [39]:
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged.head()

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.053571,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
28,Runnymede / Swansea,0.055556,0,43.651571,-79.48445,Goodfellas Wood Oven Pizza,43.648224,-79.486356,Italian Restaurant
28,Runnymede / Swansea,0.055556,0,43.651571,-79.48445,Amber European Restaurant,43.649946,-79.482009,French Restaurant
28,Runnymede / Swansea,0.055556,0,43.651571,-79.48445,Yumi Sushi,43.649891,-79.482404,Sushi Restaurant
28,Runnymede / Swansea,0.055556,0,43.651571,-79.48445,Falafel World,43.649801,-79.482728,Falafel Restaurant


In [40]:
map_clusters = folium.Map(location=[lat_toronto, lon_toronto],zoom_start=14)

markers_colors={}
markers_colors[0] = 'red'
markers_colors[1] = 'blue'
markers_colors[2] = 'green'
markers_colors[3] = 'yellow'
markers_colors[4] = 'cyan'
markers_colors[5] = 'black'
for lat, lon, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Cluster Labels']):
    
    
    folium.features.CircleMarker(
        [lat, lon],
        radius=5,
       
        color =markers_colors[cluster],
        fill_color=markers_colors[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

AttributeError: module 'folium.features' has no attribute 'CircleMarker'

In [42]:
to_merged.loc[(to_merged['Cluster Labels'] ==0) & (to_merged['Venue Category'] == 'Coffee Shop') ]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
28,Runnymede / Swansea,0.055556,0,43.651571,-79.484450,Wibke's Espresso Bar,43.649132,-79.484802,Coffee Shop
28,Runnymede / Swansea,0.055556,0,43.651571,-79.484450,Tim Hortons,43.648526,-79.485066,Coffee Shop
25,Richmond / Adelaide / King,0.095745,0,43.650571,-79.384568,Starbucks,43.646891,-79.381871,Coffee Shop
25,Richmond / Adelaide / King,0.095745,0,43.650571,-79.384568,Dineen @CommerceCourt,43.648251,-79.380127,Coffee Shop
25,Richmond / Adelaide / King,0.095745,0,43.650571,-79.384568,Starbucks,43.649028,-79.381593,Coffee Shop
...,...,...,...,...,...,...,...,...,...
13,"Garden District, Ryerson",0.090000,0,43.657162,-79.378937,Hailed Coffee,43.658833,-79.383684,Coffee Shop
13,"Garden District, Ryerson",0.090000,0,43.657162,-79.378937,Balzac's Coffee,43.657854,-79.379200,Coffee Shop
13,"Garden District, Ryerson",0.090000,0,43.657162,-79.378937,Nordstrom Ebar,43.654649,-79.380574,Coffee Shop
1,Brockton / Parkdale Village / Exhibition Place,0.086957,0,43.636847,-79.428191,Starbucks,43.639090,-79.427622,Coffee Shop


In [43]:
to_merged.loc[(to_merged['Cluster Labels'] ==1) & (to_merged['Venue Category'] == 'Coffee Shop') ]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
19,Little Portugal / Trinity,0.02381,1,43.647927,-79.41975,Jimmy's Coffee,43.644521,-79.418908,Coffee Shop


RESULT :
By analyzing nearby venues, we can conclude that the cluster 1 does not have many coffee shops in that areas.
Thus, it would be suitable to select these locations for opening coffee shops.
Therefore, locations like Central Bay Street, Riverdale, The Beaches West, Commerce court will be good to open a new coffee Shop.

### End fo the file