# Part 1

### Import Libraries

In [185]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer

#!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [116]:
#Download the wiki table to a readable df format
df=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]
print(df.shape)
df.head()


(180, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"



### Data Preprocessing

In [117]:
#Remove entries where Borough = "Not Assigned"
df = df[df.Borough != "Not assigned"]

In [118]:
#Check if any neighborhoods are still unassigned 
df=df.rename(columns={"Neighbourhood" : "Neighborhood"})
df

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [119]:
#If neighborhoods still contain "Not Assigned", but have an associated Borough, run the code below to transform Neighborhood NA's to Burough name
df['Neighborhood'].replace({'Not assigned':df['Borough']}, inplace = True)

In [120]:
#Reset df index
df = df.reset_index(drop=True)
df.head(30)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


#### Check the shape of the data frame

In [121]:
df.shape

(103, 3)

In [175]:
### Convert Neighborhood strings to lists
lst_col='Neighborhood'

df = df.assign(**{lst_col:df[lst_col].str.split(',')})
# Check df 
df

### Explode Neighborhood lists to unique columns
df=df.explode('Neighborhood')


print(df.shape)
df
df = df.reset_index(drop=True)
df

(217, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Manor
5,M6A,North York,Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park
7,M7A,Downtown Toronto,Ontario Provincial Government
8,M9A,Etobicoke,Islington Avenue
9,M9A,Etobicoke,Humber Valley Village


### Append LatLng Coordinates to Area Code

In [13]:
#Rename df column (Wiki data has changed since V1)
df=df.rename(columns={'Post Code' : 'Postal Code'})

In [14]:
df_latlng=pd.read_csv("http://cocl.us/Geospatial_data")

In [12]:
df_latlng

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [13]:
df=df.merge(df_latlng, left_on = "Postal Code", right_on = "Postal Code")

In [14]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M6A,North York,Lawrence Manor,43.718518,-79.464763


### Geocoder package unreliable. To be used with a later version

In [15]:
#import geocoder 

# initialize your variable to None
#lat_lng_coords = None

# loop until you get the coordinates
#while(lat_lng_coords is None):
   # g = geocoder.google('{}, Toronto, Ontario'.format(df['Postal Code']), maxRows = 103)
   # lat_lng_coords = g.latlng
    #latitude = lat_lng_coords[0]
    #longitude = lat_lng_coords[1]
    
#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]

# Part 3

In [16]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="on_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Canada are 43.6534817, -79.3839347.


In [17]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Create dataframe for 'Toronto' Boroughs

#### Isolate Buroughs containing name "Toronto"

In [18]:
toronto_data = df[df['Borough'].str.contains('Toronto', regex = False)].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
1,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
2,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
3,M7A,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494
4,M5B,Downtown Toronto,Garden District,43.657162,-79.378937


In [19]:
toronto_data.describe

<bound method NDFrame.describe of    Postal Code           Borough  \
0          M5A  Downtown Toronto   
1          M5A  Downtown Toronto   
2          M7A  Downtown Toronto   
3          M7A  Downtown Toronto   
4          M5B  Downtown Toronto   
5          M5B  Downtown Toronto   
6          M5C  Downtown Toronto   
7          M4E      East Toronto   
8          M5E  Downtown Toronto   
9          M5G  Downtown Toronto   
10         M6G  Downtown Toronto   
11         M5H  Downtown Toronto   
12         M5H  Downtown Toronto   
13         M5H  Downtown Toronto   
14         M6H      West Toronto   
15         M6H      West Toronto   
16         M5J  Downtown Toronto   
17         M5J  Downtown Toronto   
18         M5J  Downtown Toronto   
19         M6J      West Toronto   
20         M6J      West Toronto   
21         M4K      East Toronto   
22         M4K      East Toronto   
23         M5K  Downtown Toronto   
24         M5K  Downtown Toronto   
25         M6K      West Toron

In [20]:
# create map of Toronto(burough) using latitude and longitude values
map_toronto_b = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_b)  
    
map_toronto_b

### Hide

In [21]:
#@hidden_cell
CLIENT_ID = '1P3HBU5ISMR5YVJ1JLUMNR0LDKKGCVTZVXT354TSIJHJGWEE' # your Foursquare ID
CLIENT_SECRET = 'FHGBAV33NAO3MVLJ1STYM042ZHTBOAWRKD041FSQDVT4GAAE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1P3HBU5ISMR5YVJ1JLUMNR0LDKKGCVTZVXT354TSIJHJGWEE
CLIENT_SECRET:FHGBAV33NAO3MVLJ1STYM042ZHTBOAWRKD041FSQDVT4GAAE


In [22]:
### Open Foursquare Credentials
with open (r'C:\Users\schne\Desktop\IBM Data Science Certification\Python v2\Applied Data Science Capstone\fsquarecreds.json') as f:
    data = json.load(f)
    fsquare_creds = {'CLIENT_ID':data['CLIENT_ID'],'CLIENT_SECRET':data['CLIENT_SECRET'],'VERSION':data['VERSION']}


In [23]:
limit = 100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
####code to run the above function on each neighborhood and create a new dataframe called *df_venues*

toronto_data_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Regent Park
 Harbourfront
Queen's Park
 Ontario Provincial Government
Garden District
 Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond
 Adelaide
 King
Dufferin
 Dovercourt Village
Harbourfront East
 Union Station
 Toronto Islands
Little Portugal
 Trinity
The Danforth West
 Riverdale
Toronto Dominion Centre
 Design Exchange
Brockton
 Parkdale Village
 Exhibition Place
India Bazaar
 The Beaches West
Commerce Court
 Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
 Forest Hill Road Park
High Park
 The Junction South
North Toronto West
 Lawrence Park
The Annex
 North Midtown
 Yorkville
Parkdale
 Roncesvalles
Davisville
University of Toronto
 Harbord
Runnymede
 Swansea
Moore Park
 Summerhill East
Kensington Market
 Chinatown
 Grange Park
Summerhill West
 Rathnelly
 South Hill
 Forest Hill SE
 Deer Park
CN Tower
 King and Spadina
 Railway Lands
 Harbourfront West
 Bathurst Quay
 South Niagara
 Island airport
R

In [25]:
print(toronto_data_venues.shape)
toronto_data_venues.head()


(3246, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Regent Park,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Regent Park,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [26]:
toronto_data_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,18,18,18,18,18,18
Cabbagetown,42,42,42,42,42,42
Chinatown,66,66,66,66,66,66
Deer Park,16,16,16,16,16,16
Design Exchange,100,100,100,100,100,100
Dovercourt Village,18,18,18,18,18,18
Exhibition Place,24,24,24,24,24,24
Forest Hill Road Park,4,4,4,4,4,4
Forest Hill SE,16,16,16,16,16,16


In [27]:
## Number of Unique Categoriges
print('There are {} uniques categories.'.format(len(toronto_data_venues['Venue Category'].unique())))

There are 234 uniques categories.


### Format Data for K-Means Clustering

In [None]:
# one hot encoding
toronto_data_onehot = pd.get_dummies(toronto_data_venues['Venue Category'], prefix="", prefix_sep=" ")

# add neighborhood column back to dataframe
toronto_data_onehot['Neighborhood'] = toronto_data_venues['Neighborhood'] 
toronto_data_onehot.head()

In [None]:
# move neighborhood column to the first column
fixed_columns = [toronto_data_onehot.columns[-1]] + list(toronto_data_onehot.columns[:-1])
print(fixed_columns)

toronto_data_onehot = toronto_data_onehot[fixed_columns]
toronto_data_onehot.head()

In [None]:
toronto_data_onehot.shape

In [None]:
toronto_data_grouped = toronto_data_onehot.groupby('Neighborhood').mean().reset_index()


In [None]:
toronto_data_grouped.shape

In [None]:
num_top_venues = 5

for hood in toronto_data_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_data_grouped[toronto_data_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

In [None]:
# function to return most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
#grab the top 10 venues

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
columns

In [None]:
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_data_grouped['Neighborhood']
neighborhoods_venues_sorted.head()

In [None]:
print(np.arange(toronto_data_grouped.shape[0]))
for ind in np.arange(toronto_data_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_data_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

In [None]:
toronto_data

## K-Means Clustering

In [None]:
# set number of clusters
kclusters = 5

toronto_data_grouped_clustering = toronto_data_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_data_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

### Check for Optimal K (elbow Method)

In [None]:
# Instantiate the clustering model and visualizer
model = KMeans().fit(toronto_data_grouped_clustering)
visualizer = KElbowVisualizer(model, k=(1,15))

visualizer.fit(toronto_data_grouped_clustering)        # Fit the data to the visualizer
visualizer.show() 

#### Optimal K appears to be '5'. exponential nature of distortion makes for suspect of validity of using K-Means Clustinger

In [None]:
# set number of clusters
kclusters = 6

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
# add clustering labels #Comment this cell when run
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [None]:

toronto_data_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_data_merged = toronto_data_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_data_merged.head() # check the last columns!

In [None]:
#Remove N/A Values
#toronto_data_merged=toronto_data_merged.dropna()

In [None]:
# check the data type
#toronto_data_merged['Cluster Labels']=toronto_data_merged['Cluster Labels'].astype(int)

In [None]:
toronto_data_merged.dtypes

In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_data_merged['Latitude'], toronto_data_merged['Longitude'], toronto_data_merged['Neighborhood'], toronto_data_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' : Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Coffee and Quick Bites Cluster

In [None]:
toronto_data_merged.loc[toronto_data_merged['Cluster Labels'] == 0, toronto_data_merged.columns[[2] + list(range(5, toronto_data_merged.shape[1]))]]

# Outdoor and Recreation Cluster

In [None]:
toronto_data_merged.loc[toronto_data_merged['Cluster Labels'] == 1, toronto_data_merged.columns[[2] + list(range(5, toronto_data_merged.shape[1]))]]

# Random Cluster 1

In [None]:
toronto_data_merged.loc[toronto_data_merged['Cluster Labels'] == 2, toronto_data_merged.columns[[2] + list(range(5, toronto_data_merged.shape[1]))]]

# Air-Travel Cluster

In [None]:
toronto_data_merged.loc[toronto_data_merged['Cluster Labels'] == 3, toronto_data_merged.columns[[2] + list(range(5, toronto_data_merged.shape[1]))]]

# Random Cluster 3

In [None]:
toronto_data_merged.loc[toronto_data_merged['Cluster Labels'] == 4, toronto_data_merged.columns[[2] + list(range(5, toronto_data_merged.shape[1]))]]

In [20]:
#@hidden_cell
CLIENT_ID = '1P3HBU5ISMR5YVJ1JLUMNR0LDKKGCVTZVXT354TSIJHJGWEE' # your Foursquare ID
CLIENT_SECRET = 'FHGBAV33NAO3MVLJ1STYM042ZHTBOAWRKD041FSQDVT4GAAE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1P3HBU5ISMR5YVJ1JLUMNR0LDKKGCVTZVXT354TSIJHJGWEE
CLIENT_SECRET:FHGBAV33NAO3MVLJ1STYM042ZHTBOAWRKD041FSQDVT4GAAE


In [21]:
### Open Foursquare Credentials
with open (r'C:\Users\schne\Desktop\IBM Data Science Certification\Python v2\Applied Data Science Capstone\fsquarecreds.json') as f:
    data = json.load(f)
    fsquare_creds = {'CLIENT_ID':data['CLIENT_ID'],'CLIENT_SECRET':data['CLIENT_SECRET'],'VERSION':data['VERSION']}


In [22]:
limit = 100
def getPopulationDensity(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [23]:
####code to run the above function on each neighborhood and create a new dataframe called *df_venues*

toronto_data_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Regent Park
 Harbourfront
Queen's Park
 Ontario Provincial Government
Garden District
 Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond
 Adelaide
 King
Dufferin
 Dovercourt Village
Harbourfront East
 Union Station
 Toronto Islands
Little Portugal
 Trinity
The Danforth West
 Riverdale
Toronto Dominion Centre
 Design Exchange
Brockton
 Parkdale Village
 Exhibition Place
India Bazaar
 The Beaches West
Commerce Court
 Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
 Forest Hill Road Park
High Park
 The Junction South
North Toronto West
 Lawrence Park
The Annex
 North Midtown
 Yorkville
Parkdale
 Roncesvalles
Davisville
University of Toronto
 Harbord
Runnymede
 Swansea
Moore Park
 Summerhill East
Kensington Market
 Chinatown
 Grange Park
Summerhill West
 Rathnelly
 South Hill
 Forest Hill SE
 Deer Park
CN Tower
 King and Spadina
 Railway Lands
 Harbourfront West
 Bathurst Quay
 South Niagara
 Island airport
R

In [61]:
import requests

url = "https://www.arcgis.com/sharing/rest/oauth2/token"

payload = 'client_id=2fdnV2kg8P2y1h04&client_secret=c10dfd75ca354671b4dd874583e4dd84&grant_type=client_credentials&expiration=1209600'
headers = {
  'Content-Type': 'application/x-www-form-urlencoded'
}

response = requests.request("POST", url, headers=headers, data = payload)

print(response.text.encode('utf8'))




b'{"access_token":"HJfX6lDbwSEJ1DjswF0OeugKy0zXu7ka_j4-ec0yx2htarY055CTgfxJYjZD7KZNURWpZibI5TABgu0uEHTZHjb9ldIMHNYOD6knLeHKZBvJ_j-mymwkFQ45-Sras0MwL7qzQnokN4Snp2-W6WEXMw..","expires_in":1209600}'


In [None]:
limit = 100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
           
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [70]:
rdurl='https://www.google.com/'

In [75]:
url = 'https://geoenrich.arcgis.com/arcgis/rest/services/World/geoenrichmentserver/GeoEnrichment/enrich?studyAreas=[{"geometry":{"x":-73.9712,"y":40.7831}}]&studyAreasOptions={"areaType":"RingBuffer","bufferUnits":"esriMeters","bufferRadii":[500]}&dataCollections=["KeyGlobalFacts", "KeyUSFacts","KeyCANFacts"]&f=json&token=bgIYJBKX6KM4HKxGkECP0H3uyqSaXKuUCn0I3rkL4nl6Dmwxtylir1XqYZ0NsYykCMhm8KqaLctacBmB8V0TFttSx4uFSsWsnjsYMoagpsDe3hM64mEfC_WMn7IAJsH315NrLj66kgThQe5mhzRbhQ..&inSR=4326&OutSR=4326&return Geometry=true'
results = requests.get(url).json()
results

{'results': [{'paramName': 'GeoEnrichmentResult',
   'dataType': 'GeoEnrichmentResult',
   'value': {'version': '2.0',
    'FeatureSet': [{'displayFieldName': '',
      'fieldAliases': {'ID': 'Id',
       'OBJECTID': 'Object Id',
       'sourceCountry': 'Country code',
       'areaType': 'Area type',
       'bufferUnits': 'Buffer units',
       'bufferUnitsAlias': 'Buffer units alias',
       'bufferRadii': 'Buffer radii',
       'aggregationMethod': 'Aggregation method',
       'populationToPolygonSizeRating': 'Population to polygon size rating for the country',
       'apportionmentConfidence': 'Apportionment confidence for the country',
       'HasData': 'Has data',
       'TOTPOP': 'Total Population',
       'TOTHH': 'Total Households',
       'AVGHHSZ': 'Average Household Size',
       'TOTMALES': 'Male Population',
       'TOTFEMALES': 'Female Population',
       'TOTPOP00': '2000 Total Population',
       'TOTPOP10': '2010 Total Population',
       'TOTPOP_CY': '2020 Total Popul

In [73]:
 # create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        radius, 
        limit)

{'error': {'code': 403,
  'message': 'Unable to process this request.',
  'details': None}}

In [76]:
toronto_data.sample(frac=1)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
55,M5T,Downtown Toronto,Chinatown,43.653206,-79.400049
77,M7Y,East Toronto,South Central Letter Processing Plant Toronto,43.662744,-79.321558
30,M5L,Downtown Toronto,Commerce Court,43.648198,-79.379817
42,M5R,Central Toronto,The Annex,43.67271,-79.405678
18,M5J,Downtown Toronto,Toronto Islands,43.640816,-79.381752
3,M7A,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494
62,M5V,Downtown Toronto,CN Tower,43.628947,-79.39442
51,M6S,West Toronto,Swansea,43.651571,-79.48445
70,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846
26,M6K,West Toronto,Parkdale Village,43.636847,-79.428191


In [123]:
import pandas as pd
import requests
import geopy

from geopy.geocoders import ArcGIS




In [162]:
geolocator = geopy.geocoders.ArcGIS(username="WWSscript",password="Z3MWMrnEhXid4Z3",referer="https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer",token_lifetime=5)
geolocator

<geopy.geocoders.arcgis.ArcGIS at 0x2577ce04cc0>

In [163]:
import geocoder

In [164]:
# initialize your variable to None
lat_lng_coords = None

In [165]:
from functools import partial
from geopy.extra.rate_limiter import RateLimiter
from tqdm import tqdm
tqdm.pandas()

In [217]:
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [213]:
for i iterations in LatLng

TypeError: first argument must be an iterable of pandas objects, you passed an object of type "Series"

In [214]:
df['Latitude']=df['Neighborhood'].geocode(pd.concat(df['Neighborhood'+df['Postal Code']),out_fields=('X'))))

TypeError: first argument must be an iterable of pandas objects, you passed an object of type "Series"

In [225]:
df['Longitude'] = geocode('{},{}'.format(df['Neighborhood'],df['Postal Code']),out_fields=('Y'))

RateLimiter caught an error, retrying (0/2 tries). Called with (*("0                                           Parkwoods\n1                                    Victoria Village\n2                                         Regent Park\n3                                        Harbourfront\n4                                      Lawrence Manor\n5                                    Lawrence Heights\n6                                        Queen's Park\n7                       Ontario Provincial Government\n8                                    Islington Avenue\n9                               Humber Valley Village\n10                                            Malvern\n11                                              Rouge\n12                                          Don Mills\n13                                      Parkview Hill\n14                                   Woodbine Gardens\n15                                    Garden District\n16                                            Ryerson

In [220]:

# initialize your variable to None
lat_lng_coords = None

while(lat_lng_coords is None):
    g = geocode('{},CAN'.format(delta))
    lat_lng_coords =g.latlng
latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

RateLimiter caught an error, retrying (0/2 tries). Called with (*("0                                           Parkwoods\n1                                    Victoria Village\n2                                         Regent Park\n2                                        Harbourfront\n3                                      Lawrence Manor\n3                                    Lawrence Heights\n4                                        Queen's Park\n4                       Ontario Provincial Government\n5                                    Islington Avenue\n5                               Humber Valley Village\n6                                             Malvern\n6                                               Rouge\n7                                           Don Mills\n8                                       Parkview Hill\n8                                    Woodbine Gardens\n9                                     Garden District\n9                                             Ryerson

AttributeError: 'NoneType' object has no attribute 'latlng'

In [None]:
df2 = pd.read_json('https://localhost:8889/lab/tree/Downloads/Neighbourhoods.json'_

In [221]:
latitude

NameError: name 'latitude' is not defined

In [1]:
import arcgis.geocoding

In [2]:
from arcgis.gis import GIS
from arcgis.geocoding import Geocoder, get_geocoders

In [243]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,"(Park Woods, Pennsylvania, (39.89760000000007,...",
1,M4A,North York,Victoria Village,"(Victoria Village, Long Island, (23.0833300000...",
2,M5A,Downtown Toronto,Regent Park,"(Regent Park, Detroit, Michigan, (42.446940000...",
3,M5A,Downtown Toronto,Harbourfront,"(Harbourfront, Kowloon City, (22.3026741943858...",
4,M6A,North York,Lawrence Manor,"(Lawrence Manor, North York, Toronto, Ontario,...",
5,M6A,North York,Lawrence Heights,"(Lawrence Heights, North York, Toronto, Ontari...",
6,M7A,Downtown Toronto,Queen's Park,"(Queen'S Park, Yuen Long, (22.431482229659764,...",
7,M7A,Downtown Toronto,Ontario Provincial Government,"(Gouvernement Provincial, (49.683290000000056,...",
8,M9A,Etobicoke,Islington Avenue,"(The Avenue, Maryville, Newcastle, New South W...",
9,M9A,Etobicoke,Humber Valley Village,"(Humber Valley Village, Ontario, (43.665710000...",
