In [1]:
import pandas as pd
import numpy as np
!pip install lxml



In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

df=pd.read_html(url)[0]

# IN THE TABLE, THERE AREN'T REPEATED POSTAL CODES. ALSO, IF THE NEIGHBORHOOD IS NOT ASSIGNED THEN THE BOROUGH IS ALSO NOT ASSIGNED SO WE JUST HAVE TO REMOVE THOSE BOROUGHS FROM THE DATAFRAME

In [3]:
df=df.loc[df["Borough"]!="Not assigned", ["Postal Code", "Borough","Neighborhood"]].reset_index(drop=True)

In [4]:
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [5]:
df.shape

(103, 3)

In [6]:
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data

In [7]:
GC=pd.read_csv("Geospatial_Coordinates.csv")

### WE SORT BOTH COLUMNS BY POSTAL CODE IN ORDER TO ATTACH THE LATITUDE AND LONGITUDE EASILY TO THE PREVIOUS DATAFRAME

In [8]:
GC=GC.sort_values(by=["Postal Code"])

In [9]:
GC.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
df=df.sort_values(by=["Postal Code"]).reset_index(drop=True)

In [11]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [12]:
df[["Latitude"]]=GC[["Latitude"]]
df[["Longitude"]]=GC[["Longitude"]]

In [13]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [15]:
address = 'Toronto, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [16]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

map_toronto

# EXPLORING...

# I WANT TO FIND ALL BARS IN THE NEIGHBORHOOD "LITTLE PORTUGAL"

In [18]:
df[df['Neighborhood'].str.contains("Portugal")]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
77,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975


In [19]:
nbh_lat=df.loc[77,"Latitude"]
nbh_long=df.loc[77,"Longitude"]
print(nbh_lat,nbh_long)

43.647926700000006 -79.4197497


In [31]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    nbh_lat, 
    nbh_long, 
    radius, 
    LIMIT)

In [21]:
import requests
from pandas.io.json import json_normalize

results = requests.get(url).json()

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON



  This is separate from the ipykernel package so we can avoid doing imports until


In [23]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [24]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

In [25]:
nearby_venues[nearby_venues["venue.categories"]=="Bar"][["venue.name","venue.location.address","venue.categories"]]

Unnamed: 0,venue.name,venue.location.address,venue.categories
11,Reposado,136 Ossington Ave.,Bar
15,Dakota Tavern,249 Ossington Ave.,Bar
18,The Communist's Daughter,1149 Dundas St. W,Bar
39,apt 200,1034 Queen St W,Bar


# CLUSTERS

In [26]:
from sklearn.cluster import KMeans

In [27]:
kclusters = 4

df2=df[["Latitude","Longitude"]]

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df2)

In [28]:
kmeans.labels_[0:10]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

In [29]:
df2.insert(0, 'Cluster Labels', kmeans.labels_)

In [32]:
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df2['Latitude'], df2['Longitude'], df['Neighborhood'], df2['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# FINISHED