### 1. Start by creating a new Notebook for this assignment.

In [115]:
## import libs
from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv
# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim
# !conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.



### 2. Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes

In [116]:
# scrape the source html
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table', class_='wikitable sortable')
rows = table.find_all('tr')

# convert html to a double array (which can then be converted into a dataframe)
table_body = []
for tr in rows:
    tds = tr.find_all('td')
    if len(tds) > 0:
        row = [tr.text.replace('\n','') for tr in tds]
        table_body.append(row)

[['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', 'Downtown Toronto', "Queen's Park"],
 ['M8A', 'Not assigned', 'Not assigned'],
 ['M9A', "Queen's Park", 'Not assigned'],
 ['M1B', 'Scarborough', 'Rouge'],
 ['M1B', 'Scarborough', 'Malvern'],
 ['M2B', 'Not assigned', 'Not assigned'],
 ['M3B', 'North York', 'Don Mills North'],
 ['M4B', 'East York', 'Woodbine Gardens'],
 ['M4B', 'East York', 'Parkview Hill'],
 ['M5B', 'Downtown Toronto', 'Ryerson'],
 ['M5B', 'Downtown Toronto', 'Garden District'],
 ['M6B', 'North York', 'Glencairn'],
 ['M7B', 'Not assigned', 'Not assigned'],
 ['M8B', 'Not assigned', 'Not assigned'],
 ['M9B', 'Etobicoke', 'Cloverdale'],
 ['M9B', 'Etobicoke', 'Islington'],
 ['M9B', 'Etobicoke', 'Martin Grove'],
 ['M9B', 'Et

### 3. transform the data into a pandas dataframe

In [117]:
## 3a. The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
table_heads = ['PostalCode', 'Borough', 'Neighborhood']
df_raw = pd.DataFrame(table_body, columns=table_heads)

## 3b. Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df_raw[df_raw.Borough != 'Not assigned']

## 3c. If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df.Neighborhood.replace('Not assigned', df.Borough, inplace=True)

## 3d. More than one neighborhood can exist in one postal code area. 
## These two rows will be combined into one row with the neighborhoods separated with a comma
df  = df.groupby('PostalCode').agg(lambda x : ', '.join(set(x))).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union"
2,M1E,Scarborough,"Morningside, West Hill, Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [118]:
## 3e. In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.
df.shape

(103, 3)

### 4. Use the csv file to create the dataframe. Here is a link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [119]:
ll_raw = pd.read_csv('https://cocl.us/Geospatial_data')
df_ll = df.join(ll_raw.set_index('Postal Code'), on='PostalCode')
df_ll.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### 5. Create a map with all the Boroughs of Toronto. 

In [120]:
# let's just use the Toronto Boroughs
toronto_df = df_ll[df_ll['Borough'].str.contains('Toronto')].reset_index().drop('index', axis=1)
print(toronto_df.shape)
toronto_df.head()

(39, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [121]:
# get Toronto Latitude and Longitude
address = 'Toronto, ON'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

In [122]:
# add markers to map
for lat, lng, postcode, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['PostalCode'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = f'{postcode} - {borough} : ({neighborhood})'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### 6. Explore the map
...I'm not really sure what 'explore' entails but it seems a shame to not use the foursquare api that we learned

In [123]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


In [124]:
# The code was removed by Watson Studio for sharing.

In [125]:
# let's find the top 3 venues in or near (within 5 km) each borough
# we'll usee the average latitude and longitude of the Toronto Boroughs as the center point and look for restraunts within 25 km
radius = 5 * 1000
section = 'topPicks'
limit = 3
results = []

for i, row in toronto_df.iterrows():
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&section={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION, 
        toronto_df.loc[i, 'Latitude'],
        toronto_df.loc[i, 'Longitude'],
        radius, 
        section,
        limit
    )
    results.append(requests.get(url).json())

In [127]:
#let's convert the venues into a dataframe
from pandas.io.json import json_normalize

venues = []
for e in results:
    items = e['response']['groups'][0]['items']
    venues.extend(items)

# convert json to dataframe
dfv = json_normalize(venues)

# let's just get columsn we care about and give them easier names to work with
dfv.rename(inplace=True, columns={
    'venue.name':'name', 
    'venue.categories':'category', 
    'venue.location.address':'address', 
    'venue.location.lat':'lat', 
    'venue.location.lng':'lon'
})

dfv = dfv[['name', 'address', 'lat', 'lon', 'category']]

# we've got several duplicates so let's drop them
dfv = dfv.groupby('name').first().reset_index()

# and let's make the category column more readable
for index, row in dfv.iterrows():
    dfv.loc[index, 'category'] = row['category'][0]['name']
    
dfv

Unnamed: 0,name,address,lat,lon,category
0,Amsterdam Brewery,45 Esander Dr,43.706404,-79.357162,Brewery
1,Ashbridges Bay Beaches,Ashbridges Bay Beaches,43.662673,-79.308647,Beach
2,Bar Isabel,797 College Street,43.654782,-79.420717,Tapas Restaurant
3,Batifole,744 Gerrard St E,43.666651,-79.347261,French Restaurant
4,Bellwoods Brewery,124 Ossington Ave,43.647097,-79.419955,Brewery
5,Blood Brothers Brewing,165 Geary Ave,43.669944,-79.436533,Brewery
6,Distillery Sunday Market,1 Trinity St,43.650075,-79.361832,Farmers Market
7,Indie Alehouse,2876 Dundas St W,43.665475,-79.46529,Gastropub
8,Istanbul Cafe & Espresso Bar,174 Eglinton Avenue East,43.707891,-79.393049,Café
9,Kew Gardens,2075 Queen Street East,43.669038,-79.298538,Park


In [128]:
# now let's make a map to pl;ot all the top locations we found
map_toronto_venues = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lon, name, category, address in zip(
    dfv['lat'], dfv['lon'], dfv['name'], dfv['category'], dfv['address']
):
    label = f'{name} - {category} : ({address})'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_venues)  
    
map_toronto_venues