In [59]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

## The initial libraries needed are loaded

In [36]:
from bs4 import BeautifulSoup

import requests
wikipedia_link=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wikipedia_link,'lxml')

## Using the BeautifulSoup library the wikipedia page is scraped

In [37]:
postal_codes = soup.find('table')
fields = postal_codes.find_all('td')
#fields

### Using the soup.find function the table in the page is extracted. And then the dataframe is built using the data extracted from the table

In [38]:
postcode = []
borough = []
neighborhood = []

col_names=['PostalCode', 'Borough', 'Neighborhood'] 
for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighborhood.append(fields[i+2].text.strip())
    
CNpost_codes = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
CNpost_codes.columns = col_names

                                          
CNpost_codes.head()                                         

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Boroughs that are 'Not assigned'are removed

In [39]:
CNpost_codes['Borough'].replace('Not assigned', np.nan, inplace=True)
CNpost_codes.dropna(subset=['Borough'], inplace=True)

CNpost_codes.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### The data are grouped by PostalCode and Borough
Multiple Neighborhoods that belong to the same PostalCode and Borough are displayed together.

The neighborhoods with 'Not Assigned' are assigned with their Borough name.

In [40]:
nCNpost_codes = CNpost_codes.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
nCNpost_codes .columns = col_names
nCNpost_codes

nCNpost_codes['Neighborhood'].replace('Not assigned', "Queen's Park", inplace=True)

nCNpost_codes

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### The size of the datafame is displayed below

In [41]:
nCNpost_codes.shape

(103, 3)

### Reading in the geo data for Canadian postal

In [42]:
CNpostal_geo = pd.read_csv('http://cocl.us/Geospatial_data')
CNpostal_geo.columns = ['PostalCode', 'Latitude', 'Longitude']

## Inserting the new Latitude and Longitude columns to the previous table

In [43]:
CNpostal_loc = pd.merge(nCNpost_codes, CNpostal_geo, on=['PostalCode'], how='inner')

CNpostal_loc

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## We will represent the boroughs only in Toronto
## For that we first need the coordinates of Toronto

In [44]:
ad = 'Toronto, Canada'

geolocator = Nominatim()
loc = geolocator.geocode(ad)
latitude = loc.latitude
longitude = loc.longitude

print('The City of Toronto coordinates are {}, {}.'.format(latitude, longitude))

The City of Toronto coordinates are 43.653963, -79.387207.


## Using the latitudes and longitudes we can create a Toronto map

In [60]:
map_tor = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(CNpostal_loc['Latitude'], CNpostal_loc['Longitude'], CNpostal_loc['Borough'], CNpostal_loc['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_tor)  
    
map_tor

# Exploring Toronto neighborhoods

## Initializing with foursquare

In [57]:
CLIENT_ID = '...' # your Foursquare ID
CLIENT_SECRET = '...' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ...
CLIENT_SECRET:...


## Selecting Toronto neighborhoods

In [47]:
OnlyTor = CNpostal_loc[CNpostal_loc['Borough'].str.contains('Toronto')]

OnlyTor = OnlyTor.reset_index(drop=True)
OnlyTor

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


## Toronto only neighbor map

In [61]:
# create map of Toronto using latitude and longitude values
Tor_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(OnlyTor['Latitude'], OnlyTor['Longitude'], OnlyTor['Borough'], OnlyTor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(Tor_map)  
    
Tor_map

## Exploring the 11th neighborhood

In [49]:
OnlyTor.loc[10, 'Neighborhood']

'Rosedale'

## Now extracting the longitude and latitude values of 'Rosedale'

In [50]:
n_latitude = OnlyTor.loc[10, 'Latitude'] # neighbourhood latitude value
n_longitude = OnlyTor.loc[10, 'Longitude'] # neighbourhood longitude value

n_name = OnlyTor.loc[10, 'Neighborhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(n_name, 
                                                               n_latitude, 
                                                               n_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


## Searching for the top 100 places within 500 M from Rosedale

In [58]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    n_latitude, 
    n_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=...&client_secret=...&v=20180605&ll=43.6795626,-79.37752940000001&radius=500&limit=100'

In [52]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cbeb43d4c1f6743e2c76d6d'},
 'response': {'headerLocation': 'Rosedale',
  'headerFullLocation': 'Rosedale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.6840626045, 'lng': -79.37131878274371},
   'sw': {'lat': 43.675062595499995, 'lng': -79.38374001725632}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4aff2d47f964a520743522e3',
       'name': 'Rosedale Park',
       'location': {'address': '38 Scholfield Ave.',
        'crossStreet': 'at Edgar Ave.',
        'lat': 43.68232820227814,
        'lng': -79.37893434347683,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.68232820227814,
          'lng': -79.37893434347683}],
        'distance': 32

## Extracting the category of each venue

In [53]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

## Now cleaning the json and structuring into a dataframe

In [54]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Rosedale Park,Playground,43.682328,-79.378934
1,Whitney Park,Park,43.682036,-79.373788
2,Alex Murray Parkette,Park,43.6783,-79.382773
3,Milkman's Lane,Trail,43.676352,-79.373842


## Checking out the number or places resulting

In [55]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.
