## Importing BeautifulSoup, requests and pandas libraries

In [101]:
from bs4 import BeautifulSoup # this module helps in web scrapping.
import requests  # this module helps us to download a web page
import pandas as pd

## Defining data source webpage from wikipedia

In [102]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

## Using requests to get web page text and beautifulsoup to format it to html

In [103]:
data  = requests.get(url).text 
soup = BeautifulSoup(data,'html5lib')

## Extracting tables from the soup

In [104]:
table_contents=[]  # Blank list to contain table data
table=soup.find('table')

## Extracting data from table and creating a list with the data

In [114]:
for row in table.findAll('td'):            ## iterating between all normal table cells - tag 'td'
    cell = {}                              ## creating blank dictionary which will contain each row data
    if row.span.text == 'Not assigned':    ## ignoring cells that contain 'Not assigned'
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]              #extracting 3 characters of postal code from row
        cell['Borough'] = (row.span.text).split('(')[0]  #extracting text before first open bracket
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')').replace('/',',')).replace(')', ' ')).strip(' ')) 
        ## line above extracts element after first parenthesis and remove closing bracket then replace / and , ) for blanks
        table_contents.append(cell)  #adding the dictionary to table contents


## Transforming table_contents list into pandas Dataframe

In [142]:
df=pd.DataFrame(table_contents)  ##converting list to dataframe
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                     'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                     'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                     'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

## line above clean few cells that were not extracted right

#df.drop_duplicates(subset = 'Neighborhood', keep='first', inplace= True) # dropping duplicates
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


## Number of rows in the dataframe

In [143]:
print('The number of rows in the dataframe is: {}'.format(df.shape[0]))

The number of rows in the dataframe is: 103


## Installing geocoder

In [144]:
! pip install geocoder



## Obtaining Coordinates of postal codes -- Geocoder did not work -- never exiting loop

In [145]:
# import geocoder # import geocoder

# initialize your variable to None
# lat_lng_coords = None

# loop until you get the coordinates
# while(lat_lng_coords is None):
#  g = geocoder.google('{}, Toronto, Ontario'.format('M5G'))
#  lat_lng_coords = g.latlng

# latitude = lat_lng_coords[0]
# longitude = lat_lng_coords[1]

## Importing csv with coordinates

In [146]:
file = 'Geospatial_Coordinates.csv'
Geo_coord = pd.read_csv(file)
Geo_coord = Geo_coord.rename(columns={'Postal Code': 'PostalCode'})
Geo_coord

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [148]:
df = pd.merge(df, Geo_coord, how='left')
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto Business,Enclave of M4L,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


## Foursquare Credentials

In [1]:
CLIENT_ID = '@@' # your Foursquare ID
CLIENT_SECRET = '@@' # your Foursquare Secret
ACCESS_TOKEN = '@@' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: @@
CLIENT_SECRET:@@


## Explore Etobicoke Borough -- 11 entries

In [213]:
Nhoods = df[df['Borough'] == 'Etobicoke']
Nhoods

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
11,M9B,Etobicoke,"West Deane Park , Princess Gardens , Martin Gr...",43.650943,-79.554724
17,M9C,Etobicoke,"Eringate , Bloordale Gardens , Old Burnhamthor...",43.643515,-79.577201
70,M9P,Etobicoke,Westmount,43.696319,-79.532242
77,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov...",43.688905,-79.554724
88,M8V,Etobicoke,"New Toronto , Mimico South , Humber Bay Shores",43.605647,-79.501321
89,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam...",43.739416,-79.588437
93,M8W,Etobicoke,"Alderwood , Long Branch",43.602414,-79.543484
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


# Map the 10 most popular venues on each  of the Etobicoke Neighborhoods

In [249]:
LIMIT = 10

## Loop: Define url get json file and convert to dataframe

In [264]:
## Define blank dataframe to contain all venues
col = ['name', 'categories', 'address', 'lat', 'lng', 'labeledLatLngs',
       'distance', 'postalCode', 'cc', 'city', 'state', 'country',
       'formattedAddress', 'crossStreet', 'id']
rdf = pd.DataFrame(columns = col)
rdf

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,id


In [274]:
# function that extracts the category of the venue

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# Loop starts selecting coordinates for each Borough
for i,rows in Nhoods.iterrows():
    lat = rows['Latitude']
    lon = rows['Longitude']
    
#getting 10 most popular venues on each Neighborhood of Etobicoke
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lon, VERSION, LIMIT)
    results = requests.get(url).json() ## get data from foursquare
    items = results['response']['groups'][0]['items'] ## filter useful data
    items[0] 

## Process json file to dataframe
# tranforming json file into a pandas dataframe library
    from pandas.io.json import json_normalize
    dataframe = json_normalize(items) # flatten JSON

#filter columns
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]

# filter the category for each row
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]
    #print(dataframe_filtered.head())

#Store all results in the rdf dataframe
    rdf = rdf.append(dataframe_filtered)
rdf



Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,id,neighborhood
0,South St. Burger,Burger Joint,1020 Islington Ave,43.631314,-79.518408,"[{'label': 'display', 'lat': 43.63131374571208...",345,M8Z 6A4,CA,Etobicoke,ON,Canada,"[1020 Islington Ave, Etobicoke ON M8Z 6A4, Can...",,4bc9f9b6b6c49c7469688f91,
1,Wingporium,Wings Joint,1000 Islington Ave,43.630275,-79.518169,"[{'label': 'display', 'lat': 43.63027535508102...",278,M8Z 4P8,CA,Toronto,ON,Canada,"[1000 Islington Ave (at Titan Rd), Toronto ON ...",at Titan Rd,4b5fcadff964a520eecc29e3,
2,Power Yoga Canada Etobicoke,Yoga Studio,1092 Islington Ave,43.636592,-79.520312,"[{'label': 'display', 'lat': 43.63659220688171...",864,,CA,Etobicoke,ON,Canada,"[1092 Islington Ave, Etobicoke ON, Canada]",,4e38175dd4c0dc7ad2ec326a,
3,Fat Bastard Burrito Co.,Burrito Place,1180 The Queensway,43.622099,-79.521880,"[{'label': 'display', 'lat': 43.62209851449942...",753,,CA,Etobicoke,ON,Canada,"[1180 The Queensway, Etobicoke ON, Canada]",,4b6b2764f964a52034f62be3,
4,Dimpflmeier Factory,Bakery,20 Advance Rd.,43.633773,-79.529895,"[{'label': 'display', 'lat': 43.63377341289872...",902,M8Z 2S6,CA,Toronto,ON,Canada,[20 Advance Rd. (btwn Kipling Ave & Islington ...,btwn Kipling Ave & Islington Ave,4b62064df964a5202e312ae3,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,Starbucks,Coffee Shop,829 The Queensway,43.624654,-79.508217,"[{'label': 'display', 'lat': 43.624654, 'lng':...",1130,M8Z 1N6,CA,Toronto,ON,Canada,"[829 The Queensway (Taymall Avenue), Toronto O...",Taymall Avenue,55fd4770498e7a8fbc9082fe,
6,Cineplex Cinemas,Movie Theater,1025 The Queensway,43.621251,-79.515346,"[{'label': 'display', 'lat': 43.62125133114305...",959,M8Z 6C7,CA,Toronto,ON,Canada,"[1025 The Queensway (at Islington Ave.), Toron...",at Islington Ave.,4b0073a7f964a520643e22e3,
7,Burrito Boyz,Burrito Place,1197 The Queensway,43.621791,-79.522601,"[{'label': 'display', 'lat': 43.62179051874228...",795,M8Z 1S8,CA,Toronto,ON,Canada,"[1197 The Queensway (at Kipling), Toronto ON M...",at Kipling,4d322a6d8c42a1cd1c10e65d,
8,The Pie Commission,Restaurant,935 The Queensway,43.623548,-79.512265,"[{'label': 'display', 'lat': 43.62354819663253...",917,M8Z 1P3,CA,Etobicoke,ON,Canada,"[935 The Queensway (at Canmotor Ave.), Etobico...",at Canmotor Ave.,520546a311d2e15052dc4061,


## Installing folium

In [276]:
! pip install folium==0.5.0



## Visualizing in a map

In [298]:
import folium # plotting library


venues_map = folium.Map(location=[43.667856, -79.532242], zoom_start=11) # generate map centred around Bourough

# Loop starts selecting coordinates for each Borough
for lat, long, label in zip(Nhoods.Latitude, Nhoods.Longitude, Nhoods.Neighborhood):
                            
#i,rows in Nhoods.iterrows():
#    lat = rows['Latitude']
#    lon = rows['Longitude']
#    Nhd = rows['Neighborhood']
    
# add Neighborhood center as a red circle mark
    folium.CircleMarker(
        [lat, long],
        radius=10,
        popup=label.partition(',')[0],
        fill=True,
        color='red',
        fill_color='red',
        fill_opacity=0.6
        ).add_to(venues_map)


# add popular spots to the map as blue circle markers
for lat, lng, lab in zip(rdf.lat, rdf.lng, rdf.name):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup='popular',
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# display map
venues_map

# Observations:
#### Most of the popular venues of Mimico NW and Old Mill South neighborhoods are located along the Queensway road
#### Most of the popular venues of The Kinsway neighborhood are located near the intersection of Bloor Street West and Royal York Road
### Most of the popular venues on Etobicoke Borough are concentrated in the South. 

#### *Check readme if map is not displayed*