## Segmenting and Clustering Neighborhoods in Toronto Section 1

#### Install BeautifulSoup4 


In [11]:
#!conda install -c anaconda beautifulsoup4

In [10]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         713 KB

The following NEW packages will be INSTALLED:

    altair:  4.1.0-py_1 conda-forge
    branca:  0.4.0-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Downloading and Extracting Packages
folium-0.5.0         | 45 KB     | #####

In [3]:
#### Import BeautifulSoup from bs4

In [12]:
from bs4 import BeautifulSoup

#### Import lxml.html, Requests, urllib.request, numpy, panda and urlopen

In [13]:
import lxml.html
import requests
import urllib.request
import time
import numpy as np
import pandas as pd
from urllib.request import urlopen

#### Web scraping the URL using the BeautifulSoup package fetching the information from on the Wikipedia page 

#### Fetch the table in the html for class: wikitable

In [14]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urlopen(url)
soup = BeautifulSoup(html, 'html.parser')

# fetch the table in the html for class: wikitable
tables = soup.find_all('table', attrs={'class':'wikitable'})

#### The Program below transforms the data

#### Replaces / with ,

#### Replace empty Neighborhood with values in the Borough column

#### Drop the records in the Borough column that is Not assigned

In [15]:
PostalCodes = []
Boroughs = []
Neighborhoods = []

for table in tables:
    
      
    #print('Table: ', len(table))
    rows = table.find_all('tr')
    
    for row in rows:
        cells = row.find_all('td')
        
        if len(cells) > 2:
            PostalCode = cells[0]
            PostalCodes.append((PostalCode.text.strip()))
            
            Borough = cells[1]
            Boroughs.append(Borough.text.strip())
            
            # In the Nieghborhood column : replaces / with , 
            Neighborhood = cells[2]
            Neighborhoods.append(Neighborhood.text.strip().replace('/',','))
            
            
# add the PostalCode to the Dataframe
df1 = pd.DataFrame(PostalCodes,columns = ['PostalCode'])

# add the Borough to the Dataframe
df1['Borough'] = Boroughs

# add the Neighborhood to the Dataframe
df1['Neighborhood'] = Neighborhoods

# Replace empty Neighborhood with values in the Borough column
df1.Neighborhood.replace('',df1['Borough'], inplace = True)

# Drop the records in the Borough column that is Not assigned
df1.drop(df1[df1['Borough'] == 'Not assigned'].index, inplace = True)

# Reset the index and assign to df2
df2= df1.reset_index(drop=True)

#df2.head(10)
#pd.set_option('display.max_rows', df2.shape[0]+1)
#print(df2)

df2

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


#### download the  Geospatial_Coordinates.csv 

In [16]:

!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


### Read the csv file into a dataframe and merge with df2

In [17]:
geo_df = pd.read_csv('Geospatial_Coordinates.csv')
geo_df.head()

result = pd.merge(df2,
                 geo_df,
                 left_on='PostalCode', 
                 right_on='Postal Code',
                 how='right')

result.drop(['Postal Code'], axis=1, inplace = True)

result1= result.reset_index(drop=True)

pd.set_option('display.max_rows', result1.shape[0]+1)
result1

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


#### Get the shape of the final dataframe

In [18]:
result1.shape

(103, 5)

#### Get the Unique Boroughs and Neighborhood

In [19]:
print('The dataframe has {} boroughs and {} Neighborhood.'.format(
        len(result1['Borough'].unique()),
        len(result1['Neighborhood'].value_counts())
    )
)

The dataframe has 10 boroughs and 98 Neighborhood.


#### Install conda-forge folium and import folium

In [20]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab

#### Import Nominatim

In [23]:
from geopy.geocoders import Nominatim 

#### Use geopy library to get the latitude and longitude values of Toronto, ON

In [32]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## create map of ONTARIO , CA using latitude and longitude values

In [47]:
import folium # map rendering library



# create map of Toronto , ON using latitude and longitude values
map_Ontario = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(result1['Latitude'], result1['Longitude'], result1['Borough'], result1['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Ontario)  
    
map_Ontario 


## Create map of ONLY those Borough which contain TORONTO, ON using latitude and longitude values

In [48]:
toronto_data = result1[result1['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_data.head()

# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto


## Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them

## Define Foursquare Credentials and Version

## Print my Client ID, Client Secret and Credentials

In [49]:

#Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.
#Define Foursquare Credentials and Version

#CLIENT_ID = 'your-client-ID' # your Foursquare ID
#CLIENT_SECRET = 'your-client-secret' # your Foursquare Secret
#VERSION = '20180605' # Foursquare API version
CLIENT_ID = 'PZQSIS1DLSI21VVVLAK3HGNY0VH1PXPZHUM2YCRP4OBC1V2I' # your Foursquare ID
CLIENT_SECRET = '33YRGWZEMSHEKFQGXTKNTQROL52U0M5JJPTBZ0BU4MMXJEUP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)




Your credentails:
CLIENT_ID: PZQSIS1DLSI21VVVLAK3HGNY0VH1PXPZHUM2YCRP4OBC1V2I
CLIENT_SECRET:33YRGWZEMSHEKFQGXTKNTQROL52U0M5JJPTBZ0BU4MMXJEUP


## Let's explore the first neighborhood in our dataframe. Downtown Toronto

## Get the neighborhood's name. 

In [50]:


toronto_data.loc[0, 'Borough']


'Downtown Toronto'

In [51]:
#Get the neighborhood's latitude and longitude values.
Borough_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
Borough_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

Borough_name = toronto_data.loc[0, 'Borough'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(Borough_name, 
                                                               Borough_latitude, 
                                                               Borough_longitude))


Latitude and longitude values of Downtown Toronto are 43.6542599, -79.3606359.


### limit of number of venues returned by Foursquare API  ~ 100

### Define radius ~ 500

### Explore the venue around DownTown Toronto

In [52]:

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    Borough_latitude, 
    Borough_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=PZQSIS1DLSI21VVVLAK3HGNY0VH1PXPZHUM2YCRP4OBC1V2I&client_secret=33YRGWZEMSHEKFQGXTKNTQROL52U0M5JJPTBZ0BU4MMXJEUP&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

### Send the GET request and examine the results

In [53]:

results_toronto = requests.get(url).json()



### Function that extracts the category of the venue

In [54]:

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


### To identify the venues in Downtown Toronto using Foursquare

In [55]:

from pandas.io.json import json_normalize 

venues = results_toronto['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149
5,Impact Kitchen,Restaurant,43.656369,-79.35698
6,Corktown Common,Park,43.655618,-79.356211
7,Figs Breakfast & Lunch,Breakfast Spot,43.655675,-79.364503
8,The Distillery Historic District,Historic Site,43.650244,-79.359323
9,Dominion Pub and Kitchen,Pub,43.656919,-79.358967


### How many venues were returned by Foursquare around Downtown Toronto

In [56]:
print('{} venues were returned by Foursquare for DownTown-Toronto'.format(nearby_venues.shape[0]))

45 venues were returned by Foursquare for DownTown Toronto
