# Part 1 - Read from wikipedia the Toronto Postcodes, Boroughs, and Neighbourhoods.

## Create a dataframe of the postcodes, boroughs, and list of all neighbourhoods in the borough.

In [1]:
!pip install bs4



### Define the location of the wikipedia postal code file.  Read using BeautifulSoup

In [2]:
from bs4 import BeautifulSoup # this module helps in web scrapping.
import requests  # this module helps us to download a web page
url="https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=926287641"
html_data  = requests.get(url).text
soup = BeautifulSoup(html_data, 'html5lib')

### Read the wikipedia table of all Toronto Postcodes, Boroughs, and Neighbourhoods.  Create the final format of Postcode, Borough, and list of neighbourhoods in each Borough.

In [3]:
#Read the html data table
import pandas as pd
tables = soup.find_all('table') # in html table is represented by the tag <table>
len(tables)

#Find the table in the html with the postal data
for index,table in enumerate(tables):
    if ("wikitable sortable" in str(table)):
        table_index = index

        table_content=tables[table_index]

toronto_codes = pd.DataFrame(columns=["Postcode","Borough","Neighbourhood"])
for row in table_content.findAll('tr'):
    cols = row.find_all('td') 
    if (cols != []):
        Postcode = cols[0].text
        Borough = cols[1].text
        Neighbourhood = cols[2].text.replace('\n', '')  # remove string \n off of Neighbourhood
        if (Neighbourhood=='Not assigned'):             # Reassign Not assigned values for Neighbourhood to the Borough value
            Neighbourhood=Borough
        if (Borough !='Not assigned'):                  # Do not process Borough values of Not assigned.
          toronto_codes = toronto_codes.append({"Postcode":Postcode, "Borough":Borough, "Neighbourhood":Neighbourhood}, ignore_index=True)


#Sort thru the codes to make a list of all Neighbourhood values for each Borough values
toronto_codes=toronto_codes.sort_values(by=["Postcode","Borough"])

#Create a new column called "Neighbourhoods" which is a list of each neighbourhod in the postcode, bourough
toronto_codes_nll = pd.DataFrame(columns=["Postcode","Borough","Neighbourhood"])
nh=''
cnt=0
prev_Postcode=''
prev_Borough=''
for ind in toronto_codes.index:        #Loop thru all records in dataframe
  cnt=cnt+1
  cnty=0
  for yyy in toronto_codes.index:      #Loop thru the dataframe again but beginning with the position of the outer loop
    cnty=cnty+1
    if cnt>cnty or toronto_codes['Postcode'][ind]==prev_Postcode and toronto_codes['Borough'][ind]==prev_Borough :
      dummy='Yes'
    elif toronto_codes['Postcode'][ind]==toronto_codes['Postcode'][yyy]     \
          and toronto_codes['Borough'][ind]==toronto_codes['Borough'][yyy]:
      if nh!='':
        nh=nh+', '+toronto_codes['Neighbourhood'][yyy]
      else:
       nh=toronto_codes['Neighbourhood'][yyy]
  if nh!='':
    toronto_codes_nll = toronto_codes_nll.append({"Postcode":toronto_codes['Postcode'][ind], \
                                                  "Borough":toronto_codes['Borough'][ind],   \
                                                  "Neighbourhood":nh}, ignore_index=True)
  nh=''
  prev_Postcode=toronto_codes['Postcode'][ind]
  prev_Borough=toronto_codes['Borough'][ind]

#toronto_codes_nll[toronto_codes_nll['Postcode']=='M9V'] 
toronto_codes_nll

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


### Display the final shape of the Toronto dataframe.

In [4]:
toronto_codes_nll.shape

(103, 3)

# Part 2 - Get the latitude and longitude coordinates of the "Geospatial" dataset for Toronto postcodes.

### Read the file of latitudes and longitudes

In [5]:
geocode_dataset = pd.read_csv('Geospatial_Coordinates.csv')
geocode_dataset.rename(columns={"Postal Code": "Postcode"},inplace=True)
geocode_dataset.describe

<bound method NDFrame.describe of     Postcode   Latitude  Longitude
0        M1B  43.806686 -79.194353
1        M1C  43.784535 -79.160497
2        M1E  43.763573 -79.188711
3        M1G  43.770992 -79.216917
4        M1H  43.773136 -79.239476
..       ...        ...        ...
98       M9N  43.706876 -79.518188
99       M9P  43.696319 -79.532242
100      M9R  43.688905 -79.554724
101      M9V  43.739416 -79.588437
102      M9W  43.706748 -79.594054

[103 rows x 3 columns]>

### Merge the Toronto Postcodes with the "Geospatial" dataset into one dataframe for exploration.

In [6]:
toronto_codes_with_geoinfo=pd.merge(toronto_codes_nll, geocode_dataset, on='Postcode')
print(toronto_codes_with_geoinfo.shape)
toronto_codes_with_geoinfo.head(11)


(103, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Part 3 - Explore and cluster the neighborhoods in Toronto.

### Create and display a map of Toronto and overlay with all the boroughs.

In [7]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

import folium # map rendering library
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_codes_with_geoinfo['Latitude'], toronto_codes_with_geoinfo['Longitude'], toronto_codes_with_geoinfo['Borough'], toronto_codes_with_geoinfo['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
map_toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### A nice map does display using notebooks.   I do not know why it does not display from github saying that "Make this Notebook Trusted to load map: File -> Trust Notebook".  I have gone into File -> Trust Notebook and the this is already a trusted Notebook.  To view the map, you will need to download from github.  The "Discussion Forums" say that github will not display/load maps.

# Define Foursquare Credentials and Version

### Explore neighourhood "Woburn"


In [8]:
CLIENT_ID = '1J4H1XCKHFROMH41OUQ2Q255QMUFLIOPLE1PFUFWITISG3XP' # your Foursquare ID
CLIENT_SECRET = 'AKPY1YQUGEI4KVDOTJ0APTV4FN015PPYDIPE1EGQI3ECZUNP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [9]:
toronto_codes_with_geoinfo.loc[3, 'Neighbourhood']
neighbourhood_latitude = toronto_codes_with_geoinfo.loc[3, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = toronto_codes_with_geoinfo.loc[3, 'Longitude'] # neighborhood longitude value

neighbourhood_name = toronto_codes_with_geoinfo.loc[3, 'Neighbourhood'] # neighborhood name
print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
url # display URL

results = requests.get(url).json()

Latitude and longitude values of Woburn are 43.7709921, -79.21691740000001.


In [10]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

## Show Categories of Venues in "Woburn"

In [11]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Starbucks,Coffee Shop,43.770037,-79.221156
1,Tim Hortons,Coffee Shop,43.770827,-79.223078
2,Korean Grill House,Korean BBQ Restaurant,43.770812,-79.214502
3,"El rey del cabrito, monterrey city mexico",Mexican Restaurant,43.7688,-79.2198


In [12]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.
