# Notebook for Capstone Project

# Import libraries

In [146]:
import pandas as pd
import numpy as npù
!pip install bs4

from bs4 import BeautifulSoup
import requests



# Get the html from url

In [147]:
headers = {'Accept-Encoding': 'identity'}
r = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', headers=headers)

# Use BeatifulSoup library to analyze html and show it with prettify method

In [None]:
soup = BeautifulSoup(r.text, 'html5lib')
print(soup.prettify())

# Parse the html in order to find infos as suggested and create the ngbr_toronto dataframe

In [149]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
ngbr_toronto=pd.DataFrame(table_contents)
ngbr_toronto['Borough']=ngbr_toronto['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

ngbr_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


 # Group on PostalCode (it will automatically combine on the same Neighborhood values) and than replace all 'Not Assigned' values in Neighborhood with the value of the proper Borough 

In [150]:
ngbr_toronto.set_index('PostalCode', drop=False, inplace=True)
ngbr_toronto.rename(columns={'PostalCode': 'Postal Code'}, inplace=True)
ngbr_toronto = ngbr_toronto.groupby(by='PostalCode').sum()
ngbr_toronto.reset_index(drop=True, inplace=True)
ngbr_toronto['Neighborhood'] = ngbr_toronto.apply(lambda row: row['Borough'] if row['Neighborhood'] == 'Not Assigned' else row['Neighborhood'], axis=1)

# Print the shape of the created dataframe

In [151]:
ngbr_toronto.shape()

(103, 3)

# Merge Coordinates Informations

In [152]:
coordinates_df = pd.read_csv('/content/Geospatial_Coordinates.csv')
ngbr_toronto = pd.merge(ngbr_toronto, coordinates_df, on='Postal Code')
ngbr_toronto.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Create map with all coordinates

In [None]:
import folium
map_toronto = folium.Map(zoom_start=1)

# add markers to map
for lat, lng, borough, neighborhood in zip(ngbr_toronto['Latitude'], ngbr_toronto['Longitude'], ngbr_toronto['Borough'], ngbr_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Let's use Foursquare API to explore neighborhood in ngbr_toronto dataframe

## Foursquare access

In [159]:
CLIENT_ID = 'EWL1F4APWLEDLJEIN1YA2VUZV4VUI5ZI2XS13P2T5HTGWWRT' # your Foursquare ID
CLIENT_SECRET = 'PKB1DKE4HHLRRLEX01WESNZBQD0DGPPGIFAMHHB0KYK1B1TC' # your Foursquare Secret
ACCESS_TOKEN = 'FSWGHGNOWYZJCNZ3D1PGRW0JKHL4J21MLY3SBUYZFBBQA1NM' # your FourSquare Access Token
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EWL1F4APWLEDLJEIN1YA2VUZV4VUI5ZI2XS13P2T5HTGWWRT
CLIENT_SECRET:PKB1DKE4HHLRRLEX01WESNZBQD0DGPPGIFAMHHB0KYK1B1TC


## Get one neighborhood with max occurences

In [170]:
max_occurences = ngbr_toronto.loc[:, 'Neighborhood'].value_counts().idxmax()
max_occurences = max_occurences.split(',')[0]

## Get the top 50 venue for the neighborhood with max occurences

In [182]:
neighborhood_latitude = ngbr_toronto[ngbr_toronto['Neighborhood'] ==  max_occurences]['Latitude'].iloc[0]
neighborhood_longitude = ngbr_toronto[ngbr_toronto['Neighborhood'] ==  max_occurences]['Longitude'].iloc[0]

limit = 50
radius = 500

# Create the url
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    limit)

## Save the result in a json object

In [None]:
results = requests.get(url).json()