# I used one notebook for all questions, please scroll down

# Question 1

###### Import the modules we need to get started

In [13]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [14]:
web_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [15]:
soup = BeautifulSoup(web_url,'lxml')

###### We can sort the html table using wikitable sortable

In [16]:
tbl=soup.find('table',{'class':'wikitable sortable'})

###### I'm going to convert the table to comma separated because I find it a little easier to work with

In [17]:
csv_tbl=""
for tr in tbl.find_all('tr'):
    row=""
    for tds in tr.find_all('td'):
        row=row+","+tds.text
    csv_tbl=csv_tbl+row[1:]

###### Write the data as a csv

In [18]:
csv_file=open("data.csv","wb")
csv_file.write(bytes(csv_tbl,encoding="ascii",errors="ignore"))

8709

###### The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

In [19]:
df=pd.read_csv('data.csv',header=None)
df.columns=["PostalCode", "Borough","Neighborhood"]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


###### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [20]:
na = df[df['Borough']=='Not assigned'].index
df.drop(na, inplace=True)

In [21]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


###### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [22]:
df.loc[df['Neighborhood']=='Not assigned','Neighborhood']=df['Borough']

In [23]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


###### Group by postcode and borough and comma separate neighbourhoods

In [24]:
group=df.groupby(['PostalCode', 'Borough'], sort=False).agg(', '.join)

In [25]:
df2=group.reset_index()
df2.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


###### In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.

In [26]:
df2.shape

(103, 3)

# Question 2

###### Use the Geocoder package or the csv file to create the following dataframe:

In [27]:
df_latlon = pd.read_csv('https://cocl.us/Geospatial_data')
df_latlon.columns=['PostalCode','Latitude','Longitude'] # have to rename the postal code column
df_latlon.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


###### Merge the coordinates into the borough/neighborhood dataframe.

In [28]:
merge_df = pd.merge(df2,df_latlon[['PostalCode','Latitude','Longitude']], on='PostalCode')
merge_df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


# Question 3

###### Import necessary Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
import requests

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

###### Define Foursquare Credentials and Version

In [55]:
CLIENT_ID = 'W03DEFIYSYUW3GCTD0ZGYPKADF0Q1DDPJYL1BUQ4S4I2TIP3' # your Foursquare ID
CLIENT_SECRET = 'TF0ZKC1WEP2XAUH3TO2PYBTKEKSW5DNZV0LRZHTOW0I1UKF3' # your Foursquare Secret
VERSION = '20200307'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
hide_toggle()

Your credentails:
CLIENT_ID: W03DEFIYSYUW3GCTD0ZGYPKADF0Q1DDPJYL1BUQ4S4I2TIP3
CLIENT_SECRET:TF0ZKC1WEP2XAUH3TO2PYBTKEKSW5DNZV0LRZHTOW0I1UKF3


NameError: name 'hide_toggle' is not defined

###### Explore and cluster the neighborhoods in Toronto. You can decide to work with only boroughs that contain the word Toronto and then replicate the same analysis we did to the New York City data.

In [30]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.653963 -79.387207


###### Create a map to see how neighborhoors group together. (Map may not display correctly on GitHub)

In [33]:
map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, Borough, Neighborhood in zip(merge_df['Latitude'], merge_df['Longitude'], merge_df['Borough'], merge_df['Neighborhood']):
    label = '{}, {}'.format(Neighborhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        parse_html=False).add_to(map)  
    
map

###### Pick a neighborhood. 10 was chosen at random.

In [38]:
merge_df.loc[10,'Neighborhood']

'Glencairn'

###### Get the long and lat for Glencairn

In [39]:
neighborhood_lat = merge_df.loc[10,'Latitude']
neighborhood_lon = merge_df.loc[10,'Longitude']

###### Borrow from the Foursquare lab, define the url

In [40]:
radius=500
LIMIT=50
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=W03DEFIYSYUW3GCTD0ZGYPKADF0Q1DDPJYL1BUQ4S4I2TIP3&client_secret=TF0ZKC1WEP2XAUH3TO2PYBTKEKSW5DNZV0LRZHTOW0I1UKF3&ll=43.653963,-79.387207&v=20200307&radius=500&limit=50'

###### Send a GET request and examine the results

In [41]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e6cde5c83525f001b11bb4b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 73,
  'suggestedBounds': {'ne': {'lat': 43.6584630045, 'lng': -79.38099903084075},
   'sw': {'lat': 43.649462995499995, 'lng': -79.39341496915925}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          

###### Define information of interest and filter the dataframe

In [43]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

###### List 10 closests venues

In [54]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Japango,Sushi Restaurant,43.655268,-79.385165
2,Poke Guys,Poke Place,43.654895,-79.385052
3,Rolltation,Japanese Restaurant,43.654918,-79.387424
4,Sansotei Ramen 三草亭,Ramen Restaurant,43.655157,-79.386501
5,Karine's,Breakfast Spot,43.653699,-79.390743
6,Manpuku まんぷく,Japanese Restaurant,43.653612,-79.390613
7,Fugo Desserts,Ice Cream Shop,43.654923,-79.387382
8,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
9,The Library Specialty Coffee,Coffee Shop,43.654413,-79.390902
