In [66]:
import requests
import pandas as pd

### Using pandas to obtain the table in wikipedia article

In [67]:
website_url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
df = pd.read_html(website_url)[0]

### The table is saved in " df "

In [68]:
df.head()

Unnamed: 0,0,1,2
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


### Renaming the dataframe column names

In [69]:
df.columns = df.iloc[0]

In [70]:
df.drop(index=0, inplace=True)

In [71]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


### Dropping all the rows that do not have the Borough value assigned

In [72]:
indexList = df[(df["Borough"] == "Not assigned")].index

In [73]:
df.drop(index=indexList, inplace=True)

### Renaming rows with not assigned Neigbourhood values to their Borough names

In [74]:
df[df["Neighbourhood"] == "Not assigned"]

Unnamed: 0,Postcode,Borough,Neighbourhood


In [75]:
df.loc[10]["Neighbourhood"] = df.loc[10]["Borough"]

In [76]:
df.loc[10]

0
Postcode               M9A
Borough          Etobicoke
Neighbourhood    Etobicoke
Name: 10, dtype: object

### Using shape to print the number of rows and columns

In [77]:
df.shape

(210, 3)

In [78]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor


### Combining rows with the same Postcode values using group by 

In [79]:
df = df.groupby(["Postcode", "Borough"], sort=False).agg(','.join)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,Harbourfront
M6A,North York,"Lawrence Heights,Lawrence Manor"
M7A,Downtown Toronto,Queen's Park


### Converting multi index dataframe to single index 

In [80]:
df = df.reset_index(level=[0,1])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


### Renaming Postcode to PostalCode

In [81]:
df.rename(columns = {"Postcode":"PostalCode"}, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


### Checking the shape of the dataframe

In [82]:
df.shape

(103, 3)

### Getting the latitude and longitude of the postcode 

In [83]:
location_data = pd.read_csv(r"C:\Users\Prabhjeet\Downloads\Geospatial_Coordinates.csv")
location_data.rename(columns={"Postal Code" : "PostalCode"}, inplace=True)
location_data.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [84]:
location_data.shape

(103, 3)

### Merging df and location_data

In [85]:
df_combined = pd.merge(left=df, right=location_data, left_on='PostalCode', right_on='PostalCode')
df_combined.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


In [86]:
import folium
from geopy.geocoders import Nominatim

### Finding the coordinates of Toronto

In [87]:
geolocator = Nominatim(user_agent="toronto_explorer", timeout=3)
location = geolocator.geocode("Toronto")
latitude = location.latitude 
longitude = location.longitude
print("The latitude is {} and longitude is {}".format(latitude, longitude))

The latitude is 43.653963 and longitude is -79.387207


### Creating a map of Toronto with neighbourhoods superimposed

In [88]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for i, j in zip(df_combined["Latitude"], df_combined["Longitude"]):
    folium.CircleMarker([i, j], radius=5).add_to(map_toronto)
map_toronto

### Selecting only those rows which contain the Borough "Toronto"

In [89]:
df_toronto = df_combined[df_combined.Borough.str.contains("Toronto")]
df_toronto.reset_index(inplace=True, drop=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


### Visualizing the new dataframe

In [90]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for i, j in zip(df_toronto["Latitude"], df_toronto["Longitude"]):
    folium.CircleMarker([i, j], radius=5).add_to(map_toronto)
map_toronto

### Foursquare credentials and version

In [91]:
CLIENT_ID = 'I30QFOWBDCH5BSYJEBQGHON3RMAOQWO2HJPD1BKP4RRA1ATH' 
CLIENT_SECRET = 'S5B3U0GBMGGVPZ4VV3JHXEPY1Y4MGKBYG0C0ZHP1KVATPTZS' 
VERSION = '20180605'

### Exploring the first neighbourhood in the dataframe

In [92]:
df_toronto["Neighbourhood"][0]

'Harbourfront'

### Get the neighbourhood's latitude and longitude

In [93]:
neighborhood_latitude = df_toronto["Latitude"][0] 
neighborhood_longitude = df_toronto["Longitude"][0] 

### Get top 100 venues within a 500 meters radius of Harbourfront

In [94]:
#url = "https://api.foursquare.com/v2/venues/explore?ll={},{}&radius=500&client_id={}&client_secret={}&v={}&limit=200".format(neighborhood_latitude, neighborhood_longitude, CLIENT_ID, CLIENT_SECRET, VERSION)
#results = requests.get(url).json()

### Reusing functions from the previous notebook

In [95]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [96]:
#from pandas.io.json import json_normalize
#venues = results['response']['groups'][0]['items']
    
#nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
#filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
#nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
#nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
#nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

#nearby_venues.head()