## Import the Library and fetch data from Wiki. Remove NA rows

In [9]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

source=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

raw_list = pd.read_html(source, header=0, attrs={"class":"wikitable sortable"})[0]
raw_list= pd.DataFrame(raw_list)
raw_list=raw_list.replace("Not assigned", np.nan)
raw_list.dropna(subset=['Borough'],axis=0, inplace=True)
raw_list['Neighbourhood'].fillna(raw_list['Borough'], inplace=True)
raw_list

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### Gouping the Neighbouthood based on Postal

In [10]:
raw_list_grouped=raw_list.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ", ".join(x.astype(str))).reset_index()
raw_list_grouped = raw_list_grouped.sample(frac=1).reset_index(drop=True)
raw_list_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M6B,North York,Glencairn
1,M4A,North York,Victoria Village
2,M5P,Central Toronto,"Forest Hill North, Forest Hill West"
3,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
4,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo..."


In [11]:
raw_list_grouped.shape

(103, 3)

In [12]:
import requests
import pandas as pd
geo_file=pd.read_csv('http://cocl.us/Geospatial_data')
geo_file.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
toronto=raw_list_grouped.set_index('Postcode').join(geo_file.set_index('Postal Code'))

In [14]:
toronto.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M6B,North York,Glencairn,43.709577,-79.445073
M4A,North York,Victoria Village,43.725882,-79.315572
M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442


In [15]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto['Borough'].unique()),
        toronto.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


### Map Generation

In [16]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will

In [17]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [18]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighbourhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### playing with the data

In [19]:
mylist=toronto['Borough']
sub = 'Toronto'
listing=[sub in mystring for mystring in mylist]
TorontoDF_limited=toronto[listing].reset_index(drop=True)
TorontoDF_limited.head(12)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307
1,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442
2,Central Toronto,North Toronto West,43.715383,-79.405678
3,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752
4,West Toronto,"High Park, The Junction South",43.661608,-79.464763
5,East Toronto,Studio District,43.659526,-79.340923
6,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
7,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
8,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325
9,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846


In [20]:
TorontoDF_limited.shape

(38, 4)

In [26]:


print('The new dataframe has {} boroughs and {} neighborhoods.'.format(
        len(TorontoDF_limited['Borough'].unique()),
        TorontoDF_limited.shape[0]
    )
)

CLIENT_ID = 'BUNOW5XLSZ2VMWASNKEHYYNLYPKAEEJ1G1QO05CJ34WN0JK4' # your Foursquare ID
CLIENT_SECRET = 'VTKQXRIOXCQV5OSTWBRZB1B5BDLWLRLCDB3OVQE2UAGMTECJ' # your Foursquare Secret
VERSION = '20192904' # Foursquare API version

TorontoDF_limited.loc[0, 'Neighbourhood']

neighborhood_latitude = TorontoDF_limited.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = TorontoDF_limited.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = TorontoDF_limited.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

The new dataframe has 4 boroughs and 38 neighborhoods.
Latitude and longitude values of Forest Hill North, Forest Hill West are 43.6969476, -79.41130720000001.


### finding the 100 nearby venus

In [27]:
LIMIT=100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            neighborhood_latitude, 
            neighborhood_longitude, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            neighborhood_latitude, 
            neighborhood_longitude, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
toronto_venues = getNearbyVenues(names=toronto['Neighbourhood'],
                                   latitudes=TorontoDF_limited['Latitude'],
                                   longitudes=TorontoDF_limited['Longitude']
                                  )

Glencairn
Victoria Village
Forest Hill North, Forest Hill West
Cliffcrest, Cliffside, Scarborough Village West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
North Toronto West
Clairlea, Golden Mile, Oakridge
Islington Avenue
Harbourfront East, Toronto Islands, Union Station
Bathurst Manor, Downsview North, Wilson Heights
Cedarbrae
Woodbine Gardens, Parkview Hill
Humber Summit
High Park, The Junction South
Northwest
Studio District
Humber Bay Shores, Mimico South, New Toronto
Little Portugal, Trinity
Woodbine Heights
Ryerson, Garden District
Thorncliffe Park
Parkdale, Roncesvalles
Downsview West
Bayview Village
Willowdale South
Agincourt
Fairview, Henry Farm, Oriole
Queen's Park
Stn A PO Boxes 25 The Esplanade
Alderwood, Long Branch
Willowdale West
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Westmount
The Annex, North Midtown, Yorkville
No

In [30]:
len(TorontoDF_limited ['Neighbourhood'].unique())

38

In [31]:
print(toronto_venues.shape) #venues near Toronto latitude and longitude coordinates (radius 500 m)
toronto_venues.head()

(152, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Glencairn,43.696948,-79.411307,Kay Gardner Beltline Trail,43.700726,-79.410101,Trail
1,Glencairn,43.696948,-79.411307,TTC Bus #14 Glencairn,43.700221,-79.410274,Bus Line
2,Glencairn,43.696948,-79.411307,Nikko Sushi Japenese Restaurant,43.700443,-79.407957,Sushi Restaurant
3,Glencairn,43.696948,-79.411307,Oliver jewelry,43.700374,-79.407644,Jewelry Store
4,Victoria Village,43.696948,-79.411307,Kay Gardner Beltline Trail,43.700726,-79.410101,Trail


In [33]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",4,4,4,4,4,4
"Bathurst Manor, Downsview North, Wilson Heights",4,4,4,4,4,4
Bayview Village,4,4,4,4,4,4
"Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe",4,4,4,4,4,4
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",4,4,4,4,4,4
Cedarbrae,4,4,4,4,4,4
"Clairlea, Golden Mile, Oakridge",4,4,4,4,4,4
"Cliffcrest, Cliffside, Scarborough Village West",4,4,4,4,4,4
"Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park",4,4,4,4,4,4


In [34]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 4 uniques categories.


### Finding unique Venues and listing

In [35]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
#fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
col_list = list(toronto_onehot.columns)
col_list.remove('Neighborhood')
fixed_columns = ['Neighborhood'] + col_list
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Bus Line,Jewelry Store,Sushi Restaurant,Trail
0,Glencairn,0,0,0,1
1,Glencairn,1,0,0,0
2,Glencairn,0,0,1,0
3,Glencairn,0,1,0,0
4,Victoria Village,0,0,0,1


In [36]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Bus Line,Jewelry Store,Sushi Restaurant,Trail
0,Agincourt,0.25,0.25,0.25,0.25
1,"Alderwood, Long Branch",0.25,0.25,0.25,0.25
2,"Bathurst Manor, Downsview North, Wilson Heights",0.25,0.25,0.25,0.25
3,Bayview Village,0.25,0.25,0.25,0.25
4,"Bloordale Gardens, Eringate, Markland Wood, Ol...",0.25,0.25,0.25,0.25
5,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.25,0.25,0.25,0.25
6,Cedarbrae,0.25,0.25,0.25,0.25
7,"Clairlea, Golden Mile, Oakridge",0.25,0.25,0.25,0.25
8,"Cliffcrest, Cliffside, Scarborough Village West",0.25,0.25,0.25,0.25
9,"Cloverdale, Islington, Martin Grove, Princess ...",0.25,0.25,0.25,0.25


In [37]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
              venue  freq
0          Bus Line  0.25
1     Jewelry Store  0.25
2  Sushi Restaurant  0.25
3             Trail  0.25


----Alderwood, Long Branch----
              venue  freq
0          Bus Line  0.25
1     Jewelry Store  0.25
2  Sushi Restaurant  0.25
3             Trail  0.25


----Bathurst Manor, Downsview North, Wilson Heights----
              venue  freq
0          Bus Line  0.25
1     Jewelry Store  0.25
2  Sushi Restaurant  0.25
3             Trail  0.25


----Bayview Village----
              venue  freq
0          Bus Line  0.25
1     Jewelry Store  0.25
2  Sushi Restaurant  0.25
3             Trail  0.25


----Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe----
              venue  freq
0          Bus Line  0.25
1     Jewelry Store  0.25
2  Sushi Restaurant  0.25
3             Trail  0.25


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
