# All Three Sections are Contained within this Notebook

## Part 1- Scrape and Transform Toronto Data

In [72]:
import numpy as np
import pandas as pd

In [73]:
#Scrape data from Wikipedia
wiki_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
scrape=pd.read_html(wiki_link)[0]

In [74]:
#Ignore cells with no borough assigned
scrape=scrape[scrape.Borough!='Not assigned']

In [75]:
#Check for neighborhood not assigned in dataframe
scrape[scrape.Neighborhood=='Not assigned']

Unnamed: 0,Postal Code,Borough,Neighborhood


Since no neighborhods are unassigned neighborhoods can now be aggregated by Postal Code and Borough

In [76]:
toronto=scrape.groupby(["Postal Code","Borough"],as_index=False).agg(lambda x: ", ".join(x))

In [77]:
toronto.shape

(103, 3)

## Part 2- Geocoding Toronto postal codes

In [78]:
#Geospatial data from the link was downloaded and imported
geo_spat=pd.read_csv("Geospatial_Coordinates.csv")

In [79]:
toronto=toronto.merge(geo_spat,how="left",on=["Postal Code"])

In [80]:
#Test against coursera assignment page
test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]
test=toronto[toronto["Postal Code"].isin(test_list)]
test['cat_sort']=pd.Categorical(
    test["Postal Code"], 
    categories=test_list, 
    ordered=True
)
test.sort_values(['cat_sort']).drop(columns=['cat_sort'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
17,M2H,North York,Hillcrest Village,43.803762,-79.363452
35,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
38,M4G,East York,Leaside,43.70906,-79.363452
43,M4M,East Toronto,Studio District,43.659526,-79.340923
11,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
96,M9L,North York,Humber Summit,43.756303,-79.565963
68,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442


Coordinates tally with frame on assignment page

## Part 3- Explore and Cluster Neighborhoods

In [123]:
import json
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests

In [127]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    LIMIT=100
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [128]:
def get_lat_lng(address):
    
    try:
    
        location = geolocator.geocode('{}, Toronto, Ontario'.format(address))
        latitude = location.latitude
        longitude = location.longitude
            
        return latitude, longitude
    
    except:
        return None, None

In [82]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Map Toronto Neighborhods

In [83]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

### Use Foursquare API for exploration

#### Define Foursquare credentials

In [84]:
CLIENT_ID = 'D401AJGJPVCSSPFFCVAOAN2ZE2RRTF2OGQDP5OOTHJEXK5NR' # your Foursquare ID
CLIENT_SECRET = 'ETW1QCU54RFBEILWBMCJ11PVGNLUWZMCUWQ5AVGVEHC220VV' # your Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: D401AJGJPVCSSPFFCVAOAN2ZE2RRTF2OGQDP5OOTHJEXK5NR
CLIENT_SECRET:ETW1QCU54RFBEILWBMCJ11PVGNLUWZMCUWQ5AVGVEHC220VV


In [86]:
#explode neighborhoods to rows
toronto.rename(columns={'Latitude':'Postal_Latitude',
                        'Longitude':'Postal Logitude'},inplace=True)
toronto.Neighborhood=toronto.Neighborhood.apply(lambda x: [y.strip() for y in x.split(',')])
toronto=toronto.set_index(list(toronto.drop(columns=['Neighborhood']).columns)).apply(pd.Series.explode).reset_index()

In [88]:
#Create address for geocoding
toronto['Address']=toronto.Neighborhood+', '+toronto.Borough

In [111]:
#get latitude and longitudes for neighborhoods
toronto=toronto.merge(toronto.Address.apply(lambda x: pd.Series(get_lat_lng(x))),left_index=True,right_index=True)

In [115]:
toronto.columns = ['Postal Code','Borough', 'Postal_Latitude','Postal Logitude','Neighborhood','Address',
                   'Neighborhood_lat','Neighborhood_lng']

In [118]:
#keep only neighborhoods that were geocoded
toronto.dropna(subset=['Neighborhood_lat','Neighborhood_lng'],inplace=True)

In [129]:
toronto_venues = getNearbyVenues(names=toronto['Neighborhood'],
                                   latitudes=toronto['Neighborhood_lat'],
                                   longitudes=toronto['Neighborhood_lng']
                              )

Malvern
Rouge
Rouge Hill
Port Union
Highland Creek
Guildwood
Morningside
West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park
Ionview
East Birchmount Park
Golden Mile
Clairlea
Oakridge
Cliffside
Cliffcrest
Scarborough Village West
Birch Cliff
Cliffside West
Dorset Park
Wexford Heights
Scarborough Town Centre
Wexford
Maryvale
Agincourt
Clarks Corners
Tam O'Shanter
Sullivan
Milliken
Agincourt North
Steeles East
L'Amoreaux East
Steeles West
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview
Henry Farm
Oriole
Bayview Village
York Mills
Silver Hills
Willowdale
Newtonbrook
Willowdale
Willowdale East
York Mills West
Willowdale
Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor
Wilson Heights
Downsview North
Northwood Park
York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
Broadview North (Old East York)
The Danforth West
Riverdale
The Beaches West
Studio District
Lawr