## Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

### Problem Statement 1:


### Importing important libraries

In [271]:
import numpy as np 
import pandas as pd
from geopy.geocoders import Nominatim           # convert an address into latitude and longitude values
from bs4 import BeautifulSoup                   # library for web-scrapping
import requests                                 # To pull the JSON database datas
import folium                                   # To show location data in the map

### Web-Scrapping using BeautifulSoup

In [272]:
Data_source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text  #source of data 
soup = BeautifulSoup(Data_source, 'lxml')
table = soup.find('table')

### Data-Preperation and cleaning 

In [276]:
data = list()
for rows in table.find_all('tr'):
    
    row = rows.find_all('td')
    if row:
        postalcode = row[0].text.rstrip()
        borough = row[1].text.rstrip()
        neighborhood = row[2].text.rstrip()
        if borough != 'Not assigned':
            if neighborhood == 'Not assigned':             # if neighbourhood is "Not-assigned", take neighbourhood=borough
                neighborhood = borough
            data.append([postalcode, borough, neighborhood]) #Appending only the postalcose,borough and neighbourhood

col_head = list()
for cols in table.tr.find_all('th'):
    col_head.append(cols.text.strip())

### creating DataFrame

In [277]:
data = pd.DataFrame(data, columns = col_head)
data.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
data.head(20)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [238]:
#Shape of dataframe 
data.shape

(103, 3)

### Problem Statement 2:

### Reading Geospatial coordinates of the given postcodes

In [239]:
dfgeo = pd.read_csv("Geospatial_Coordinates.csv")
dfgeo.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
dfgeo.head(5)

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merging latitude and longitude datas with the previous dataframe

In [240]:
df = pd.merge(data, dfgeo, on="Postcode", how='left')
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Problem Statement 3:

### Select only Toronto Neighborhoods

In [245]:
df = df[df['Borough'].str.contains('Toronto')]

df = df.reset_index(drop=True)
df.head(5)
df.shape  # Now there exist only those addresses or postcodes that are located in Toronto only 

(39, 5)

In [251]:
address = 'Toronto, Canada'                            # Address of the location which is to be examined

geolocator = Nominatim(user_agent="Toronto_Traveller")
location = geolocator.geocode(address)                 # To access the location of the given adress
latitude = location.latitude                           # To access the latitude of Toronto
longitude = location.longitude                         # To access the Longitude of toronto

print('The geograpical coordinate of Toronto is : {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto is : 43.6534817, -79.3839347.


### Mapping Torronto Neighbourhoods

In [253]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Exploring Toronto Neighbourhood Using Foursquare Dataset

In [254]:
CLIENT_ID="LPYCJIALAJDLBBT31E4ES13IPEWMCDYK5U3WBGXWBNGWM3AF"            # Foursquare ID of User
CLIENT_SECRET="2TRPJHGRCOFPWS1QVERZB1JXZ5DTBYOH0HLDKN2US5XXVPLQ"        # Foursquare client_secret of user
VERSION = '20180604'
print('My Foursquare credentails :')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My Foursquare credentails :
CLIENT_ID: LPYCJIALAJDLBBT31E4ES13IPEWMCDYK5U3WBGXWBNGWM3AF
CLIENT_SECRET:2TRPJHGRCOFPWS1QVERZB1JXZ5DTBYOH0HLDKN2US5XXVPLQ


### crating URL so that data can be extracted from Foursquare into a JSON file or database

In [262]:
radius=800                 
LIMIT=100
neighborhood_latitude=df.loc[0,"Latitude"]
neighborhood_longitude=df.loc[0,"Longitude"]
neighbourhood_name = df.loc[0, 'Neighborhood']


url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

print("The Foursquare URL to extract the Toronto Neighbourhood data is :",url)

The Foursquare URL to extract the Toronto Neighbourhood data is : https://api.foursquare.com/v2/venues/explore?&client_id=LPYCJIALAJDLBBT31E4ES13IPEWMCDYK5U3WBGXWBNGWM3AF&client_secret=2TRPJHGRCOFPWS1QVERZB1JXZ5DTBYOH0HLDKN2US5XXVPLQ&v=20180604&ll=43.6542599,-79.3606359&radius=800&limit=100


### Loading data to a json file or database

In [263]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eedb8f46d8c560028a353a1'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 85,
  'suggestedBounds': {'ne': {'lat': 43.661459907200005,
    'lng': -79.35070310023775},
   'sw': {'lat': 43.647059892799994, 'lng': -79.37056869976226}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label

### function that extracts the category of the venue

In [264]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [265]:
venues = results['response']['groups'][0]['items']
nearby_venues = pd.json_normalize(venues)                        # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues["Neighbourhood"]=df.Neighborhood
nearby_venues["Neighbourhood_lat"]=df.Latitude
nearby_venues["Neighbourhood_lng"]=df.Longitude
nearby_venues.rename(columns={"lat":"Venue_lat","lng":"Venue_lng","name":"Venue"},inplace=True)
nearby_venues.head()

Unnamed: 0,Venue,categories,Venue_lat,Venue_lng,Neighbourhood,Neighbourhood_lat,Neighbourhood_lng
0,Roselle Desserts,Bakery,43.653447,-79.362017,"Regent Park, Harbourfront",43.65426,-79.360636
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008,"Garden District, Ryerson",43.657162,-79.378937
3,Corktown Common,Park,43.655618,-79.356211,St. James Town,43.651494,-79.375418
4,The Distillery Historic District,Historic Site,43.650244,-79.359323,The Beaches,43.676357,-79.293031


In [267]:
print("In the 800 radious ")
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))
print('{} Unique categories of venues returned by Foursquare'.format(nearby_venues.categories.nunique()))

In the 800 radious 
85 venues were returned by Foursquare.
51 Unique categories of venues returned by Foursquare


### creating map to examine Venue

In [278]:
# create map of Toronto using the selected neighbourhood latitude and longitude values
map_tohood = folium.Map(location=[neighborhood_latitude, neighborhood_longitude], zoom_start=15.5)

#add a red circle marker to represent the selected neighborhood
folium.CircleMarker(
    [neighborhood_latitude, neighborhood_longitude],
    radius=12,
    color='red',
    popup= nearby_venues["Neighbourhood"],
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_tohood)



# add markers to map
for lat, lng, name, categories in zip(nearby_venues['Venue_lat'], nearby_venues['Venue_lng'], nearby_venues['Venue'], nearby_venues['categories']):
    label = '{}, {}'.format(name, categories)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=6,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3).add_to(map_tohood)  

map_tohood