# This notebook will be mainly used for the Applied Data Science capstone project

In [131]:
import pandas as pd
import numpy as np

# Segmenting and Clustering Neighborhoods in Toronto

## Scrape data from wikipedia

In [132]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Clean data

In [133]:
# Delete cells with Borough that is Not assigned.
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [134]:
# Combine neighbourhoods with multiple postal codes into one line

group = df.groupby(['Postal Code', 'Borough','Neighborhood'])
df = group.apply(pd.DataFrame)
print(df.loc[df['Postal Code'] == 'M5A'] )

  Postal Code           Borough               Neighborhood
4         M5A  Downtown Toronto  Regent Park, Harbourfront


In [66]:
# Set Neighbourhood = Borough for Not assigned Neighborhood
for i, row in df.iterrows():
    if (( row['Neighborhood'] == 'Not assigned') & ( row['Borough'] != 'Not assigned')):
        row['Neighborhood'] = row['Borough']
        print( row)

In [135]:
# Print number of rows using shape function
print( "Number of rows: ", df.shape[0])

Number of rows:  103


## Use csv file to get lat, long

In [136]:
# Read csv with lats, longs
df_geo = pd.read_csv("https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv")
# print( "2: ", df_geo.head())

# Merge dataframes
df = df.merge(df_geo, left_on='Postal Code', right_on='Postal Code')
np.set_printoptions(linewidth=200)
df.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## work with only boroughs that contain the word Toronto 

In [137]:
df = df[(df.Borough == 'Downtown Toronto') | (df.Borough == 'East Toronto') | (df.Borough == 'West Toronto') | (df.Borough == 'Central Toronto')]
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


## Create a map of Toronto with neighborhoods superimposed on top

### Prepare environment

In [138]:
import json # library to handle JSON files

In [None]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [105]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.2 |       hecda079_0         147 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2020.4.5.2         |   py36h9f0ad1d_0         152 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                       

In [139]:

# create map of Toronto using latitude and longitude values
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Define Foursquare Credentials and Version

In [140]:
CLIENT_ID = '4XBD0M3OC0UPKFXZAJSEOPK01OGQTKVVLEY0FZTTRADOUO3W' # your Foursquare ID
CLIENT_SECRET = 'OSWGGNB3XXRJE1EH2LF3JVLCFONCNQSRNGBEQTPBT0YMNFFJ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4XBD0M3OC0UPKFXZAJSEOPK01OGQTKVVLEY0FZTTRADOUO3W
CLIENT_SECRET:OSWGGNB3XXRJE1EH2LF3JVLCFONCNQSRNGBEQTPBT0YMNFFJ


## explore the first neighborhood

### Get the first neighborhood's name.

In [141]:
df.head()
neighborhood_name = df.loc[2, 'Neighborhood']

print(neighborhood_name)

Regent Park, Harbourfront


### Get the neighborhood's latitude and longitude values

In [142]:
neighborhood_latitude = df.loc[2, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[2, 'Longitude'] # neighborhood longitude value

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


### get the top 100 venues 

In [143]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=4XBD0M3OC0UPKFXZAJSEOPK01OGQTKVVLEY0FZTTRADOUO3W&client_secret=OSWGGNB3XXRJE1EH2LF3JVLCFONCNQSRNGBEQTPBT0YMNFFJ&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

### Send the GET request and examine the resutls

In [144]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ee848947f06d94cf867c86c'},
 'response': {'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 46,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.653446723052674,
          'lng': -79.3620167174383}],
        'distance': 143,
       