<h1 align=center><font size = 5>Read and Clean Neighbourhood Data for Toronto</font></h1>

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.21.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

#### Read contents of url using urllib library and transfer contents on a pandas dataframe with read_html <br>
Only keep rows for which __Borough__ is NOT "Not assigned"  
If __Neighbourhood__ is "Not assigned" set it to name of Borough


In [2]:
import urllib.request

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
content = urllib.request.urlopen(url).read()
df_list = pd.read_html(content)
df_t = df_list[0]
df_n= df_t[df_t.Borough != 'Not assigned']
df = df_n.reset_index(drop=True)
#df_n.head(10)

# iterate over rows and replace "Not assigned" Neighbourhood with Borough
for index in range(len(df)):
    if (df.loc[index, "Neighbourhood"] == 'Not assigned'): 
        df.loc[index,"Neighbourhood"] = df.loc[index, "Borough"]
        
df.head(10)
    


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Queen's Park
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


In [3]:
df.shape

(210, 3)

#### Get latitude and longitude for PostalCodes

In [4]:
# Reaad from CSV file:
import pandas as pd

url = "http://cocl.us/Geospatial_data/Geospatial_Coordinates.csv"
df_codes = pd.read_csv(url)
df_codes.rename(columns = {'Postal Code': 'Postcode'}, inplace = True) # Rename Postal Code to Postcode
df_codes.head()


Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Add latitide and longitude columns to data frame

In [5]:
dfn = pd.merge(df, df_codes, on ='Postcode')
dfn.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,43.718518,-79.464763
5,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
6,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
7,M1B,Scarborough,Rouge,43.806686,-79.194353
8,M1B,Scarborough,Malvern,43.806686,-79.194353
9,M3B,North York,Don Mills North,43.745906,-79.352188


In [6]:
dfn.shape

(210, 5)

#### Let's narrow down to boroughs that have the word Toronto

In [31]:
toronto_nbhd = dfn[dfn['Borough'].str.contains('Toronto')]  
toronto_nbhd.reset_index(inplace = True, drop = True)
toronto_nbhd.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
3,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
5,M4E,East Toronto,The Beaches,43.676357,-79.293031
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
8,M6G,Downtown Toronto,Christie,43.669542,-79.422564
9,M5H,Downtown Toronto,Adelaide,43.650571,-79.384568


Print number of neighbourhoods having the word "Toronto"

In [22]:
toronto_nbhd.shape

(74, 5)

### Now lets analyse the Downtown Toronto neighbourhood

In [23]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### Create a map of Toronto with neighborhoods superimposed

In [51]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_nbhd['Latitude'], toronto_nbhd['Longitude'], toronto_nbhd['Borough'], toronto_nbhd['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto
    

### Now we will use the Foursquare API to explore neighborhoods and segment them

#### Denine Foursquare Credentials

In [37]:
CLIENT_ID = 'OHO02Z4Z1LC5E2ATUXJQH1S2RJVI4OLQ1Z2RLB2U01V2J0RX' 
CLIENT_SECRET = 'CG4FDCUJ5CGAE3ZR550L4BFYVUICFVGIHRVNUPXZSZU4I43K' 
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OHO02Z4Z1LC5E2ATUXJQH1S2RJVI4OLQ1Z2RLB2U01V2J0RX
CLIENT_SECRET:CG4FDCUJ5CGAE3ZR550L4BFYVUICFVGIHRVNUPXZSZU4I43K


#### Let;s explore first neighborhood in our dataframe

In [33]:
toronto_nbhd.loc[0,'Neighbourhood']

'Harbourfront'

Get the neighborhood's latitude and longitude

In [35]:
neighborhood_latitude = toronto_nbhd.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_nbhd.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_nbhd.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Harbourfront are 43.6542599, -79.3606359.


#### Now let's get the top 100 venues in Harbourfront 

In [38]:
lat = neighborhood_latitude
lng = neighborhood_longitude
radius = 500
LIMIT= 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)

In [39]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e435f2ec546f3001b87391b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 46,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [40]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [44]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Gym / Fitness Center,43.653191,-79.357947
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Impact Kitchen,Restaurant,43.656369,-79.35698


In [42]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

46 venues were returned by Foursquare.


Number of unique categires

In [46]:
nearby_venues['categories'].nunique()  

28

#### Number of venues for each category

In [50]:
nearby_venues.groupby("categories")['name'].count()

categories
Antique Shop             1
Bakery                   3
Bank                     1
Beer Store               1
Breakfast Spot           2
Café                     3
Chocolate Shop           1
Coffee Shop              8
Cosmetics Shop           1
Dessert Shop             1
Electronics Store        1
Event Space              1
Farmers Market           1
French Restaurant        1
Gym / Fitness Center     1
Health Food Store        1
Historic Site            1
Hotel                    1
Ice Cream Shop           1
Mexican Restaurant       2
Park                     3
Performing Arts Venue    1
Pub                      3
Restaurant               2
Shoe Store               1
Spa                      1
Theater                  1
Yoga Studio              1
Name: name, dtype: int64