## Capstone Project - Part I

### Creating dataset from the Wikipedia page

*Import libraries necessary and BeautifulSoup*

In [1]:
import numpy as np
import pandas as pd

In [2]:
import requests
from bs4 import BeautifulSoup

*Accessing the wiki page and using soup to find the table*

In [3]:
wikipage = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(wikipage)

In [4]:
soup = BeautifulSoup(response.text,'html.parser')
table = soup.find('table', {'class': 'wikitable sortable'})

*finding all rows based on html syntax*

In [5]:
rows_all = table.find_all('tr')

*defining columns and creating dataframe with one row containing column headings*

In [6]:
column_names = ['Postal Code', 'Borough', 'Neighborhood']
df = pd.DataFrame(columns=column_names)

*Populate the dataframe with the rows using for loop*

In [7]:
for tr in rows_all[1:]:
    td = tr.find_all('td')
    row = [i.text.replace('\n','') for i in td]
    df = df.append({'Postal Code': row[0],
                    'Borough': row[1],
                    'Neighborhood': row[2]}, ignore_index=True)

In [8]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [9]:
df.shape

(180, 3)

*Dropping the rows where the boroughs are 'not assigned'*

In [10]:
df.drop(df[df['Borough']== 'Not assigned'].index, axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [11]:
df.shape

(103, 3)

*Making sure that neighborhood has no 'not assigned' values*

In [12]:
for x in range (0,len(df)):
    df.replace(df.iloc[x,2]=='Not assigned', df.iloc[x,1], inplace=True)



In [13]:
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [14]:
df.shape

(103, 3)

##### Reading the csv file with zipcodes and areas (http://cocl.us/Geospatial_data') to a dataframe

In [15]:
df_zipcode = pd.read_csv('http://cocl.us/Geospatial_data')
df_zipcode.shape

(103, 3)

In [16]:
df_zipcode.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


##### Merge two dataframes based on values in 'Postal Code'

In [17]:
df_neighborhood = pd.merge(df, df_zipcode, on = 'Postal Code')
df_neighborhood.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Use FourSquare to explore the Downtown Toronto (Postal Code: M5A)

In [18]:
CLIENT_ID = 'YMX5TBXNFF0OEYWZ1ASKBOZTGOAMBZYB4Y34NZBWB30GZA34' # your Foursquare ID
CLIENT_SECRET = 'EJDSGO1E1OSRZN3Y4ITYZRIP4A1TTC4DNONDGHF4RCFMQ2H4' # your Foursquare Secret
VERSION = '20200603'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YMX5TBXNFF0OEYWZ1ASKBOZTGOAMBZYB4Y34NZBWB30GZA34
CLIENT_SECRET:EJDSGO1E1OSRZN3Y4ITYZRIP4A1TTC4DNONDGHF4RCFMQ2H4


##### Exploring restaurants

In [19]:
limit = 25
radius = 20000
search_query = 'restaurant'

In [20]:
url_restaurants = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, 43.654260, -79.360636, VERSION, search_query, radius, limit)
eat_tor = requests.get(url_restaurants).json()
eat_toronto = eat_tor['response']['venues']
Toronto_eats_df = pd.json_normalize(eat_toronto)
Toronto_eats = Toronto_eats_df[['id','name','location.lat','location.lng','location.distance']]
Toronto_eats.head(10)

Unnamed: 0,id,name,location.lat,location.lng,location.distance
0,4ad4c05cf964a52006f620e3,Victoria's Restaurant,43.649298,-79.376431,1386
1,5d3127c715cfea0007e44d52,est Restaurant,43.658911,-79.349035,1068
2,4ada5d5bf964a520e92121e3,The Hot House Restaurant & Bar,43.648824,-79.373702,1213
3,4ae77362f964a52069ab21e3,Mi Mi Restaurant,43.666293,-79.349079,1631
4,4ad4c05cf964a520e1f520e3,Docks Restaurant & Night Club The,43.641806,-79.354171,1480
5,4ad4c05ff964a52048f720e3,Hemispheres Restaurant & Bistro,43.654884,-79.385931,2038
6,4ad4c05ff964a52018f720e3,Cottage Restaurant & Lounge,43.66277,-79.376894,1616
7,4b7b268cf964a52061542fe3,Tender Trap Restaurant,43.667724,-79.369485,1659
8,4bd47e6fcfa7b7139f2924da,Studio Restaurant,43.6615,-79.379319,1706
9,4ad4c060f964a5207bf720e3,Gio Rana's Really Really Nice Restaurant,43.663367,-79.330425,2635


##### Exploring shopping centers

In [21]:
search_query1 = 'shopping centre'

In [22]:
url_shops = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, 43.654260, -79.360636, VERSION, search_query1, radius, limit)
shops_tor = requests.get(url_shops).json()
shop_toronto = shops_tor['response']['venues']
Toronto_shops_df = pd.json_normalize(shop_toronto)
Toronto_shops = Toronto_shops_df[['id','name','location.lat','location.lng','location.distance']]
Toronto_shops.head(10)

Unnamed: 0,id,name,location.lat,location.lng,location.distance
0,5044fe32e4b0c55b95cc2efd,York Mills Shopping Centre,43.747731,-79.384724,10584
1,4b71aa4bf964a520a4542de3,Chartwell Shopping Centre 集友商場,43.797768,-79.270853,17532
2,4f788be0e4b01af713f49d94,SkyCity Shopping Centre,43.80477,-79.288545,17729
3,4ef1f20d61af815c7919a64a,Upper Oakville Shopping Centre Head Office,43.654294,-79.340546,1618
4,4de175d9fa7651589ee7a14f,Milliken Wells Shopping Centre 愛登商場,43.825518,-79.276936,20217
5,4ad4c062f964a52002f820e3,Yorkdale Shopping Centre,43.725482,-79.452736,10854
6,5c55cbe28194fc002cc3f1fd,Riverdale Shopping Centre,43.668694,-79.34207,2194
7,4b157c6bf964a52091ad23e3,Galleria Shopping Centre,43.667592,-79.442053,6722
8,4adc8d0df964a520222d21e3,Square One Shopping Centre,43.593169,-79.644493,23863
9,4f1814f1e4b0259ede704386,Sancardia Shopping Centre,43.678378,-79.358412,2690


#### Using Folium to show the restaurants and shopping centers in the neighborhood

In [28]:
#map2 = folium.Map(location=[38.9, -77.05], tiles='CartoDB dark_matter', zoom_start=11)
import folium
map_Toronto = folium.Map(location=[43.654260, -79.360636], zoom_start=15)

for lat, lng, name in zip(Toronto_eats['location.lat'],Toronto_eats['location.lng'],Toronto_eats['name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat, lng],
        popup=label,
        ).add_to(map_Toronto)  
        
for lat, lng, name in zip(Toronto_shops['location.lat'],Toronto_shops['location.lng'],Toronto_shops['name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Toronto)  

    
map_Toronto


In [None]:
folium.Marker([lat,lng],popup=name).add_to(map_Toronto)
location=[row["lat"], row["lon"]], 
                                              radius=10, popup=row['name'])
                                             .add_to(f_map), axis=1