### Import all required libraries - numpy, pandas, json, requests, etc.

In [1]:
import numpy as np 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim


import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.17.0                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Libraries imported.


### Scraping the wiki page using BeautifulSoup - import BeautifulSoup and install lxml

In [2]:
from bs4 import BeautifulSoup
!conda install -y lxml

Fetching package metadata ...........
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
lxml                      4.2.5            py35hefd8a0e_0  


### Get the url and print after scraping using BeautifulSoup

In [3]:
webURL = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(webURL,'lxml')
#print(soup.prettify()) # tested and then commented only to not show entire o/p to maintain readability

### Find the table wikitable sortable

In [4]:
postCodeTable = soup.find('table',{'class':'wikitable sortable'})
#postCodeTable

### using findall and a for loop run through the table populating the details using append based on the required conditions asked in the question

In [5]:
postalCode = []
borough = []
neighborhood = []

for row in postCodeTable.find_all('tr')[1:]:  # skipping first row for header
    boroughCell = row.find_all('td')[1] # processing boroughs - second coloumn first to skip non assigned ones 
    if (boroughCell.text.strip() != 'Not assigned'):
        borough.append(boroughCell.text.strip())
        postCell = row.find_all('td')[0]  #  first coloumn
        postalCode.append(postCell.text.strip())
        neighborhoodCell = row.find_all('td')[2] #third coloumn
        if (neighborhoodCell.text.strip() == 'Not assigned'):
            neighborhood.append(boroughCell.text.strip())
        else:
            neighborhood.append(neighborhoodCell.text.strip())

### Load into a pandas df and using groupby and join get the final required output

In [6]:
df = pd.DataFrame()
df['PostalCode'] = postalCode
df['Borough'] = borough
df['Neighborhood'] = neighborhood
df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### df.shape to print number of rows of df including header

In [7]:
df.shape[0]

103

In [8]:
!conda install -c conda-forge geocoder --yes

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geocoder                  1.38.1                     py_0    conda-forge


In [9]:
# geocoder latlong does not seem to work always, using the csv option instead

geospat_df = pd.read_csv('http://cocl.us/Geospatial_data')
geospat_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
for column in df.columns[0:1]:
    for colGeo in geospat_df.columns[0:1]:
        if df[column][1] == geospat_df[colGeo][1]: 
            df['latitude'] = geospat_df["Latitude"]
            df['longitude'] = geospat_df["Longitude"]
df

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### Exploring Toronto neighbors using Boroughs with only Toronto in it 

In [11]:
toronto_df = df[df.Borough.str.contains('Toronto', na=False)].reset_index(drop=True)
toronto_df.head(50)

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [12]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [13]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_df['latitude'], toronto_df['longitude'], toronto_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Expample Exploring using Foursquare API

In [14]:
# The code was removed by Watson Studio for sharing.

### Exploring some DownTown Toronto area - say Rosedale neighborhood in our frame

In [15]:
toronto_df.loc[10, 'Neighborhood']

'Rosedale'

### Get its lat and long

In [16]:
neighborhood_latitude = toronto_df.loc[10, 'latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_df.loc[10, 'longitude'] # neighborhood longitude value

neighborhood_name = toronto_df.loc[10, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


### Italian food near Rosedale within 500 mt radius

In [17]:
search_query = 'Italian'
radius = 500
print(search_query + ' .... OK!')

Italian .... OK!


In [18]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=W5P1I5QOV4SJP351UBDWO0WWUZWJHGVTXICMOUR3ZVT1TNLS&client_secret=WUTXQYKW2H0RPWKYCD51WSBQJPHABGSWW3KN0XYTJ03XUXVX&ll=43.653963,-79.387207&v=20180604&query=Italian&radius=500&limit=30'

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5beaa3144c1f6717d390181d'},
 'response': {'venues': [{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
       'suffix': '.png'},
      'id': '4bf58dd8d48988d110941735',
      'name': 'Italian Restaurant',
      'pluralName': 'Italian Restaurants',
      'primary': True,
      'shortName': 'Italian'}],
    'hasPerk': False,
    'id': '51bf3866498e55ee55df8db0',
    'location': {'cc': 'CA',
     'city': 'Toronto',
     'country': 'Canada',
     'distance': 127,
     'formattedAddress': ['Toronto ON', 'Canada'],
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.654991437465284,
       'lng': -79.38789662900189}],
     'lat': 43.654991437465284,
     'lng': -79.38789662900189,
     'state': 'ON'},
    'name': 'The Fresh Italian',
    'referralId': 'v-1542103828'},
   {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/travel/embassy_',
       'suffix': '.png'},
      'id': '4b

In [20]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'pluralName': 'Italian Restaurants', 'shortN...",False,51bf3866498e55ee55df8db0,,CA,Toronto,Canada,,127,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65499143746528...",43.654991,-79.387897,,ON,The Fresh Italian,v-1542103828,
1,"[{'pluralName': 'Embassies / Consulates', 'sho...",False,4bfc0289c3ba9521c00f9653,136 Beverley St,CA,Toronto,Canada,Dundas Street,555,"[136 Beverley St (Dundas Street), Toronto ON, ...","[{'label': 'display', 'lat': 43.65402694219784...",43.654027,-79.394104,,ON,Italian Consulate Toronto,v-1542103828,
2,"[{'pluralName': 'Italian Restaurants', 'shortN...",False,4f88cf84e4b002b90ab3b9b9,,CA,,Canada,,434,[Canada],"[{'label': 'display', 'lat': 43.65053979517576...",43.65054,-79.384603,,,LA's Italian + Bar,v-1542103828,
3,"[{'pluralName': 'Italian Restaurants', 'shortN...",False,526fe29411d2aeb3803013b0,"109 McCaul Street, Unit #42",CA,Toronto,Canada,Dundas Street West,288,"[109 McCaul Street, Unit #42 (Dundas Street We...","[{'label': 'display', 'lat': 43.653889, 'lng':...",43.653889,-79.390785,M5T 3K5,ON,The Fresh Italian Eatery,v-1542103828,
4,"[{'pluralName': 'Italian Restaurants', 'shortN...",False,573df789498e03dd8e54b166,595 Bay St,CA,Toronto,Canada,Dundas St,405,"[595 Bay St (Dundas St), Toronto ON M5G 2C2, C...","[{'label': 'display', 'lat': 43.65616, 'lng': ...",43.65616,-79.38319,M5G 2C2,ON,Mustachio Italian Eatery,v-1542103828,


### Keep information of interest and filter dataframe

In [21]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,The Fresh Italian,Italian Restaurant,,CA,Toronto,Canada,,127,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65499143746528...",43.654991,-79.387897,,ON,51bf3866498e55ee55df8db0
1,Italian Consulate Toronto,Embassy / Consulate,136 Beverley St,CA,Toronto,Canada,Dundas Street,555,"[136 Beverley St (Dundas Street), Toronto ON, ...","[{'label': 'display', 'lat': 43.65402694219784...",43.654027,-79.394104,,ON,4bfc0289c3ba9521c00f9653
2,LA's Italian + Bar,Italian Restaurant,,CA,,Canada,,434,[Canada],"[{'label': 'display', 'lat': 43.65053979517576...",43.65054,-79.384603,,,4f88cf84e4b002b90ab3b9b9
3,The Fresh Italian Eatery,Italian Restaurant,"109 McCaul Street, Unit #42",CA,Toronto,Canada,Dundas Street West,288,"[109 McCaul Street, Unit #42 (Dundas Street We...","[{'label': 'display', 'lat': 43.653889, 'lng':...",43.653889,-79.390785,M5T 3K5,ON,526fe29411d2aeb3803013b0
4,Mustachio Italian Eatery,Italian Restaurant,595 Bay St,CA,Toronto,Canada,Dundas St,405,"[595 Bay St (Dundas St), Toronto ON M5G 2C2, C...","[{'label': 'display', 'lat': 43.65616, 'lng': ...",43.65616,-79.38319,M5G 2C2,ON,573df789498e03dd8e54b166
5,Classic italian style pizza food truck,Food Truck,CNE Midway,CA,Toronto,Canada,,530,"[CNE Midway, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.652144, 'lng':...",43.652144,-79.381118,,ON,4c787c9181bca0936180fa14
6,john's italian cafe,Italian Restaurant,27 Baldwin Street,CA,Toronto,Canada,,546,"[27 Baldwin Street, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65612672798775...",43.656127,-79.393301,,ON,53daae5b498e9c9597c19b23
7,Little Anthony's,Italian Restaurant,121 Richmond St. W,CA,Toronto,Canada,at York St.,462,"[121 Richmond St. W (at York St.), Toronto ON ...","[{'label': 'display', 'lat': 43.65029624519052...",43.650296,-79.384513,M5H 2K1,ON,4b846dd4f964a520dc3431e3
8,Sbarro,Pizza Place,220 Yonge St.,CA,Toronto,Canada,in Toronto Eaton Centre,526,"[220 Yonge St. (in Toronto Eaton Centre), Toro...","[{'label': 'display', 'lat': 43.655518, 'lng':...",43.655518,-79.38103,M5B 2H1,ON,4b4a2d09f964a520687d26e3


### Visualize Italian restaurants nearby

In [22]:
dataframe_filtered.name

0                         The Fresh Italian
1                 Italian Consulate Toronto
2                        LA's Italian + Bar
3                  The Fresh Italian Eatery
4                  Mustachio Italian Eatery
5    Classic italian style pizza food truck
6                       john's italian cafe
7                          Little Anthony's
8                                    Sbarro
Name: name, dtype: object

In [23]:
#Map around Rosedale 
venues_map = folium.Map(location=[neighborhood_latitude, neighborhood_longitude], zoom_start=12) 

# add a red circle marker to represent Rosedale  
folium.features.CircleMarker(
    [neighborhood_latitude, neighborhood_longitude],
    radius=10,
    color='red',
    popup='The Beach',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map