In [91]:
#!conda install -c conda-forge beautifulsoup4
from bs4 import BeautifulSoup
#!conda install -c conda-forge lxml
import requests
from geopy.geocoders import Nominatim
#!conda install -c conda-forge folium
import folium
import pandas as pd
from pandas.io.json import json_normalize

### 1. Scrape the Wikipedia page to get data

In [92]:
source=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(source,'lxml')
#print(soup.prettify())

In [93]:
table=soup.find('table',class_='wikitable sortable')
data=table.find_all('td')
Postcode=[]
Borough=[]
Neighborhood=[]
for item in data[0::3]:
    Postcode.append(item.text)
for item in data[1::3]:
    try:
        Borough.append(item.a.text)
    except:
        Borough.append(None)
for item in data[2::3]:
    try:
        Neighborhood.append(item.a.text)
    except:
        Neighborhood.append(None)

### 2. Create the dataframe

In [94]:
df_data={'Postcode':Postcode,'Borough':Borough,'Neighborhood':Neighborhood}
df=pd.DataFrame(df_data,columns=['Postcode','Borough','Neighborhood'])
df.shape

(288, 3)

#### Remove cells with a borough that is Not assigned

In [95]:
df.dropna(subset=['Borough'],inplace=True)
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


#### Assign neighborhoods that are Not assigned to their borough

In [96]:
missing_index=df[df['Neighborhood'].isna()].index
df.loc[missing_index,'Neighborhood']=df.loc[missing_index,'Borough']
df.head(12)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


#### Combine multiple neighborhoods with the same postcode togeter, separated with ',' 

In [102]:
df.drop_duplicates(inplace=True)
Toronto_Neighborhoods=df.groupby(['Postcode','Borough'])['Neighborhood'].apply(lambda x: ', '.join(x)).to_frame().reset_index()
Toronto_Neighborhoods.head(20)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Scarborough, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Scarborough
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Scarborough, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough"
9,M1N,Scarborough,"Birch Cliff, Scarborough"


In [103]:
Toronto_Neighborhoods.shape

(100, 3)

In [31]:
Toronto.loc[0,'Neighborhood']

'East Toronto, Riverdale'

In [33]:
LATITUDE=Toronto.loc[0,'Latitude']
LONGITUDE=Toronto.loc[0,'Longitude']
RADIUS=500
LIMIT=100
url='https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID,
                                                                                                                         CLIENT_SECRET,
                                                                                                                        VERSION,
                                                                                                                        LATITUDE,
                                                                                                                        LONGITUDE,
                                                                                                                        RADIUS,
                                                                                                                        LIMIT)
results=requests.get(url).json()

In [79]:
venues=results['response']['groups'][0]['items']
nearby_venues=json_normalize(venues)

In [80]:
# function that extracts the category of the venue
def get_category_type(row):
    
    categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [83]:
def getNearbyVenues(names, latitudes, longitudes, radius=500,LIMIT=100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [84]:
toronto_venues = getNearbyVenues(names=Toronto['Neighborhood'],
                                 latitudes=Toronto['Latitude'],
                                 longitudes=Toronto['Longitude']
                                )
toronto_venues.head()

The Beaches
East Toronto, Riverdale
East Toronto, India Bazaar
East Toronto
Lawrence Park
Central Toronto
Central Toronto
Central Toronto
Moore Park, Central Toronto
Deer Park, Central Toronto, Rathnelly, South Hill, Central Toronto
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Downtown Toronto, Downtown Toronto
St. James Town
Berczy Park
Downtown Toronto
Downtown Toronto, Downtown Toronto, Downtown Toronto
Downtown Toronto, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Downtown Toronto
Central Toronto
Forest Hill North, Central Toronto
The Annex, Central Toronto, Yorkville
Downtown Toronto, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Downtown Toronto, Downtown Toronto, Downtown Toronto, King and Spadina, Railway Lands, South Niagara
Downtown Toronto
First Canadian Place, Underground city
Downtown Toronto
Dovercourt Village, West Toronto
Little Portugal, Trinity
West Toronto, E

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
2,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"East Toronto, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [90]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,55,55,55,55,55,55
"CN Tower, Downtown Toronto, Downtown Toronto, Downtown Toronto, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Toronto,64,64,64,64,64,64
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Church and Wellesley,87,87,87,87,87,87
"Commerce Court, Downtown Toronto",100,100,100,100,100,100
"Deer Park, Central Toronto, Rathnelly, South Hill, Central Toronto",14,14,14,14,14,14
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100
"Dovercourt Village, West Toronto",20,20,20,20,20,20
