# The Battle of Neighborhoods (Best Neighborhood in Etobicoke)

In [1]:
import pandas as pd
import requests
import numpy as np
import geocoder
import folium
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
import xml
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
from bs4 import BeautifulSoup

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


### Extracting Data

Using BeautifulSoup Scraping List of Postal Codes of Given Wikipedia Page. Link: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
extracting_data = requests.get(url).text
soup = BeautifulSoup(extracting_data, 'lxml')


### Converting content of PostalCode HTML table as dataframe

In [3]:
table_post = soup.find('table')
fields = table_post.find_all('td')

postalcode = []
borough = []
neighbourhood = []

for i in range(0, len(fields), 3):
    postalcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighbourhood.append(fields[i+2].text.strip())
        
df = pd.DataFrame(data=[postalcode, borough, neighbourhood]).transpose()
df.columns = ['Postalcode', 'Borough', 'Neighbourhood']

print(df.shape)

df['Borough'].replace('Not assigned', np.nan, inplace=True)
df.dropna(subset=['Borough'], inplace=True)
df.head()


(180, 3)


Unnamed: 0,Postalcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
df.describe()

Unnamed: 0,Postalcode,Borough,Neighbourhood
count,103,103,103
unique,103,10,99
top,M2J,North York,Downsview
freq,1,24,4


In [16]:
df['Borough'].replace('Not assigned', np.nan, inplace=True)
df.dropna(subset=['Borough'], inplace=True)
df.reset_index()
df1 = df.reset_index()
df1['Neighbourhood'].replace('Not assigned', "Borough", inplace=True)
df1.drop('index', 1, inplace=True)
df1.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.65512,-79.36264
1,M4A,North York,Victoria Village,43.72327,-79.45042
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.66253,-79.39188
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.66263,-79.52831
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.81139,-79.19662


In [20]:
df2 = df1[df1['Borough'].str.contains('Etobicoke')]

df4 = df2.reset_index(drop=True)
df3 = df4.dropna()
df3

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.70718,-79.31192
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.64857,-79.57825
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.76944,-79.23892


In [6]:
def get_latilong(Borough):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(Borough))
        lati_long_coords = g.latlng
    return lati_long_coords
    
get_latilong('Etobicoke')

[43.64436000000006, -79.56712999999996]

In [18]:
# Retrieving Postal Code Co-ordinates
postal_codes = df3['Postalcode']    
coords = [ get_latilong(postal_code) for postal_code in postal_codes.tolist() ]

In [14]:
# Adding Columns Latitude & Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']
df1 = df[.reset_index(drop=True)]
df1

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.65512,-79.36264
1,M4A,North York,Victoria Village,43.72327,-79.45042
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.66253,-79.39188
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.66263,-79.52831
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.81139,-79.19662
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.70718,-79.31192
6,M1B,Scarborough,"Malvern, Rouge",43.65739,-79.37804
7,M3B,North York,Don Mills,43.65034,-79.55362
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.78574,-79.15875
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.72168,-79.34352


In [9]:
CLIENT_ID = 'DR1MIWRU23GQVVWMXIDRF2W2VOTWIHVEF0I0P1OX2EJX2WX1' # your Foursquare ID
CLIENT_SECRET = '3Q03BWILIZFKLACVW12NYZD5ZOVRAPY0UX0NB1IIV1X0J5V5' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

In [11]:
address = 'Etobicoke, Toronto'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The Geograpical Co-ordinate of Etobicoke, Toronto, Ontario, Canada are {}, {}.'.format(latitude, longitude))

The Geograpical Co-ordinate of Etobicoke, Toronto, Ontario, Canada are 43.6435559, -79.5656326.



### Map of Etobicoke, Toronto

In [21]:
map_E = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_E)  
    
map_E

In [25]:

radius = 700 
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
   longitude, 
    radius, 
   LIMIT)
results = requests.get(url).json()

In [26]:

venues=results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues.columns

Index(['referralId', 'reasons.count', 'reasons.items', 'venue.id',
       'venue.name', 'venue.location.address', 'venue.location.crossStreet',
       'venue.location.lat', 'venue.location.lng',
       'venue.location.labeledLatLngs', 'venue.location.distance',
       'venue.location.postalCode', 'venue.location.cc', 'venue.location.city',
       'venue.location.state', 'venue.location.country',
       'venue.location.formattedAddress', 'venue.categories',
       'venue.photos.count', 'venue.photos.groups',
       'venue.location.neighborhood'],
      dtype='object')

In [27]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


### Some Nearby Venues

In [28]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues.head()

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Tim Hortons,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",43.644705,-79.567659
1,Farmer's Market Etobicoke,"[{'id': '4bf58dd8d48988d1fa941735', 'name': 'F...",43.643061,-79.566191
2,Loblaws,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",43.643848,-79.560113
3,State & Main Kitchen & Bar,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",43.645778,-79.560374
4,TD Canada Trust,"[{'id': '4bf58dd8d48988d10a951735', 'name': 'B...",43.645502,-79.560006


In [29]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Tim Hortons,Coffee Shop,43.644705,-79.567659
1,Farmer's Market Etobicoke,Farmers Market,43.643061,-79.566191
2,Loblaws,Grocery Store,43.643848,-79.560113
3,State & Main Kitchen & Bar,Restaurant,43.645778,-79.560374
4,TD Canada Trust,Bank,43.645502,-79.560006
5,Burnhamthorpe and The West Mall,Intersection,43.644786,-79.567065
6,GoodLife Fitness Etobicoke East Mall and Burnh...,Gym,43.645403,-79.559604
7,Joe Fresh,Clothing Store,43.643911,-79.560126
8,Rabba,Convenience Store,43.647096,-79.563026
9,Four Seasons Place,Hotel,43.647128,-79.563009


### Top 5

In [30]:
a=pd.Series(nearby_venues.categories)
a.value_counts()[:5]

Park                  1
Coffee Shop           1
Recreation Center     1
Theater               1
Mexican Restaurant    1
Farmers Market        1
Bank                  1
Intersection          1
Grocery Store         1
Hotel                 1
Restaurant            1
Gym                   1
Café                  1
Convenience Store     1
Clothing Store        1
Name: categories, dtype: int64

In [39]:
def getNearbyVenues(names, latitudes, longitudes, radius=700):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # making GET request
        venue_results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in venue_results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)