<H3>Hello, I will be using code comments instead of markdown cells</H3>
<h4>have a nice day!</h4>

In [1]:
# import needed libs
import sys
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate

In [2]:
#get URL
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

#parse URL
soup = BeautifulSoup(website_url,'lxml')

#make html code pretty
soup.prettify()

#get table from code
My_table = soup.find('table',{'class':'wikitable sortable'})
df = pd.read_html(str(My_table))
df=df[0]

#naming the columns
df.columns = ['Postcode', 'Borough', 'Neighbourhood']

#dropping 1st row
df.drop(df.index[0], inplace=True)

#lets see what we got
print(df.head())
print(df.size)

  Postcode           Borough     Neighbourhood
1      M1A      Not assigned      Not assigned
2      M2A      Not assigned      Not assigned
3      M3A        North York         Parkwoods
4      M4A        North York  Victoria Village
5      M5A  Downtown Toronto      Harbourfront
867


In [3]:
#drop all rows containing "Not assigned" Neighbourhood
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True)

#get rid of 'Not assigned Neighbourhoods
for index, row in df.iterrows():
    if row['Neighbourhood']=='Not assigned':
        row['Neighbourhood']=row['Borough']

#lets see what we got
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


In [4]:
#grouping by Boroughs and showing the result
res=df.groupby(['Postcode'],as_index=False).agg(lambda x: "%s" % ', '.join(x.unique())) 

#reset indexes in dataframe       
df.reset_index()

#lets see what we have as result
res

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [5]:
#lets see what shape do we have here
res.shape

(103, 3)

<h2>Part 2 - Fetching geodata</h2>

In [6]:
#installing GeoCoder anв import
!pip install geocoder
import geocoder

Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K    100% |████████████████████████████████| 102kB 7.3MB/s ta 0:00:01
[?25hRequirement not upgraded as not directly required: future in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: six in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Requirement not upgraded as not directly required: requests in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: click in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement 

In [7]:
#define url
url = 'https://cocl.us/Geospatial_data'

#read coordinates from csv to dataframe
coordDF=pd.read_csv(url)

#watch new dataframe
coordDF.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
#rename Postal Code column to Postcode
coordDF = coordDF.rename(columns={'Postal Code': 'Postcode'})
coordDF.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
#merge 2 dataframes
rezdf = coordDF.merge(res, on='Postcode', how='left')
rezdf.head()

Unnamed: 0,Postcode,Latitude,Longitude,Borough,Neighbourhood
0,M1B,43.806686,-79.194353,Scarborough,"Rouge, Malvern"
1,M1C,43.784535,-79.160497,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,43.763573,-79.188711,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae


In [10]:
#change sequence of columns
rezdf = rezdf[['Postcode', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']]
rezdf.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<h2>Part 3 - "Explore and cluster the neighborhoods in Scarborough, Toronto"</h2>
<h1 style="color:red;">Task: show on map all the police stations in 10 km radius where Borough == Scarborough</h1>
<p>I am making a video game about bank robbers in Scarborough, Toronto, so I need to know all the real police stations there to put them into my game.</p>

In [11]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  23.41 MB/s
geopy-1.18.1-p 100% |################################| Time: 0:00:00  36.70 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  52.97 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  36.35 MB/s
vincent-0.4.4- 100% |###################

In [12]:
#Define Foursquare Credentials and Version and limit
CLIENT_ID = '5C0SBFWN5NJLFSCNXDUH2O2BRCF1SPBDEJKXB34CLS1JJZQK' # your Foursquare ID
CLIENT_SECRET = 'MKJLMSSBH4IIKRFZPS55TKIDVU1VMEJJ1ILYYOW0GHHN0QF2' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

In [13]:
#take all the postcodes of Scarborough
scarcodes=rezdf.loc[rezdf['Borough'] == 'Scarborough']
scarcodes

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [14]:
#lets define search query
search_query = 'police station'

#lets define radius = 10 km
radius = 10000

In [15]:
#lets assume that we will fetch police stations from the 10 km circle in the centre of Scarborough, which will be average of its Neighbourhood's Latitude & Longitude
center_lat=scarcodes.Latitude.sum() / scarcodes.Latitude.count()
center_lng=scarcodes.Longitude.sum() / scarcodes.Longitude.count()
print('Coordinates of the search area centre are: ', center_lat, center_lng)

Coordinates of the search area centre are:  43.7662288941 -79.2490852353


In [16]:
#define search URL
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, center_lat, center_lng, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=5C0SBFWN5NJLFSCNXDUH2O2BRCF1SPBDEJKXB34CLS1JJZQK&client_secret=MKJLMSSBH4IIKRFZPS55TKIDVU1VMEJJ1ILYYOW0GHHN0QF2&ll=43.76622889411765,-79.24908523529412&v=20180604&query=police station&radius=10000&limit=30'

In [18]:
#results in JSON format
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c6baf70351e3d13a8b09e66'},
 'response': {'venues': [{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/travel/subway_',
       'suffix': '.png'},
      'id': '4bf58dd8d48988d1fd931735',
      'name': 'Metro Station',
      'pluralName': 'Metro Stations',
      'primary': True,
      'shortName': 'Metro'}],
    'hasPerk': False,
    'id': '4ae9b80ef964a520d0b521e3',
    'location': {'address': '2455 Eglinton Av E',
     'cc': 'CA',
     'city': 'Toronto',
     'country': 'Canada',
     'crossStreet': 'at Kennedy Rd',
     'distance': 3922,
     'formattedAddress': ['2455 Eglinton Av E (at Kennedy Rd)',
      'Toronto ON',
      'Canada'],
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.73291798585667,
       'lng': -79.26498413085938}],
     'lat': 43.73291798585667,
     'lng': -79.26498413085938,
     'state': 'ON'},
    'name': 'Kennedy Subway/RT Station',
    'referralId': 'v-1550561136'},
   {'categories':

In [19]:
# assign relevant part of JSON to venues
police_venues = results['response']['venues']

# tranform venues into a dataframe
police_df = json_normalize(police_venues)

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in police_df.columns if col.startswith('location.')] + ['id']
police_df_filtered = police_df.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
police_df_filtered['categories'] = police_df_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
police_df_filtered.columns = [column.split('.')[-1] for column in police_df_filtered.columns]

#lets see what we got here
police_df_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Kennedy Subway/RT Station,Metro Station,2455 Eglinton Av E,CA,Toronto,Canada,at Kennedy Rd,3922,"[2455 Eglinton Av E (at Kennedy Rd), Toronto O...","[{'lng': -79.26498413085938, 'lat': 43.7329179...",43.732918,-79.264984,,ON,4ae9b80ef964a520d0b521e3
1,Scarborough Centre RT Station,Light Rail Station,300 Borough Dr,CA,Toronto,Canada,,1141,"[300 Borough Dr, Toronto ON M1P 4P5, Canada]","[{'lng': -79.25739079713821, 'lat': 43.7745413...",43.774541,-79.257391,M1P 4P5,ON,4b61e094f964a5209c282ae3
2,McCowan RT Station,Light Rail Station,1275 McCowan Rd,CA,Toronto,Canada,at Ellesmere Rd,986,"[1275 McCowan Rd (at Ellesmere Rd), Toronto ON...","[{'lng': -79.25177693367004, 'lat': 43.7748744...",43.774874,-79.251777,M1P 2K2,ON,4b17097bf964a5204dc123e3
3,Scarborough Centre Bus Station,Bus Station,300 Borough dr,CA,Scarborough,Canada,,1145,"[300 Borough dr, Scarborough ON, Canada]","[{'lng': -79.25774001667654, 'lat': 43.7744061...",43.774406,-79.25774,,ON,500b6cbde4b001d6c599ef3f
4,Lawrence East RT Station,Light Rail Station,2444 Lawrence Ave E,CA,Toronto,Canada,at Midland Ave.,2447,"[2444 Lawrence Ave E (at Midland Ave.), Toront...","[{'lng': -79.27019163966177, 'lat': 43.7503833...",43.750383,-79.270192,M1P 4S5,ON,4b0bdcc3f964a520363423e3
5,Victoria Park Subway Station,Metro Station,777 Victoria Park Ave,CA,Toronto,Canada,at Danforth,8558,"[777 Victoria Park Ave (at Danforth), Toronto ...","[{'lng': -79.28873509168625, 'lat': 43.6948826...",43.694883,-79.288735,M4B 2E2,ON,4b17084cf964a52038c123e3
6,Midland RT Station,Light Rail Station,2085 Midland Ave,CA,Toronto,Canada,,1900,"[2085 Midland Ave, Toronto ON M1P 3E4, Canada]","[{'lng': -79.27199006080627, 'lat': 43.7704662...",43.770466,-79.27199,M1P 3E4,ON,4b0d3007f964a520734423e3
7,Lawrence East RT Station - Parking Lot,Parking,,CA,Toronto,Canada,,2407,"[Toronto ON, Canada]","[{'lng': -79.26995158195496, 'lat': 43.7507224...",43.750722,-79.269952,,ON,51064cb9e4b048a9e1edef60
8,Main Street Subway Station,Metro Station,2550 Danforth Ave,CA,Toronto,Canada,at Main St,9570,"[2550 Danforth Ave (at Main St), Toronto ON M4...","[{'lng': -79.30166156422199, 'lat': 43.6891085...",43.689109,-79.301662,M4C 1L4,ON,4ae89c26f964a520d1b021e3
9,Kennedy Station - North Service Road Parking Lot,Parking,,CA,,Canada,,3810,[Canada],"[{'lng': -79.26346063613892, 'lat': 43.7336079...",43.733608,-79.263461,,,51076ca7e4b04498826987d4


In [20]:
venues_map = folium.Map(location=[center_lat,center_lng], zoom_start=12) # generate map centred around the search area centre

# add a red circle marker to represent the search area centre
folium.features.CircleMarker(
    [center_lat, center_lng],
    radius=5,
    color='red',
    popup='The police station search area centre',
    fill = True,
    fill_color = 'red',
    fill_opacity = 1
).add_to(venues_map)

# add the police venues as blue circle markers
for lat, lng, label in zip(police_df_filtered.lat, police_df_filtered.lng, police_df_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=3,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=1
    ).add_to(venues_map)

# display map
venues_map

<h2>So, I have all the neccessary info I need for my "Ultimate Scarborough bankrobbers" video game!</h2>