# Coursera Capstone Week 3

In [139]:
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
import numpy as np

In [150]:
column_names = ['Postalcode', 'Borough', 'Neighborhood'] 
neighborhoods = pd.DataFrame(columns=column_names)

def make_soup(url):
    wikipage = urllib.request.urlopen(url)
    soupdata = BeautifulSoup(wikipage, "html.parser")
    return soupdata

soup = make_soup("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")


table = soup.find_all('table', class_="wikitable sortable")[0]


wikipedia2 = []
for item in table.findAll('tr'):
    wikidata = []
    for data in item.findAll('td'):
        wikidata.append(data.text)
    if len(wikidata) == 3:
        neighborhoods = neighborhoods.append({'Postalcode': wikidata[0],
                                          'Borough': wikidata[1],
                                          'Neighborhood': wikidata[2]}, ignore_index=True)

neighborhoods['Neighborhood'] = neighborhoods['Neighborhood'].map(lambda x: str(x)[:-1])
neighborhoods.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [141]:
df = neighborhoods[neighborhoods.Borough != 'Not assigned']

In [152]:
df.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [153]:
df2 = df.groupby(['Postalcode','Borough'])['Neighborhood'].apply(lambda x: ", ".join(x.astype(str))).reset_index()
df3 = df2.sample(frac=1).reset_index(drop=True)
df3.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M9W,Etobicoke,Northwest
1,M6C,York,Humewood-Cedarvale
2,M1J,Scarborough,Scarborough Village
3,M4K,East Toronto,"The Danforth West, Riverdale"
4,M9M,North York,"Emery, Humberlea"
5,M4B,East York,"Woodbine Gardens, Parkview Hill"
6,M4M,East Toronto,Studio District
7,M2J,North York,"Fairview, Henry Farm, Oriole"
8,M6L,North York,"Maple Leaf Park, North Park, Upwood Park"
9,M5H,Downtown Toronto,"Adelaide, King, Richmond"


In [154]:
print(df3.shape)

(103, 3)


# Part 2
## Geo Data

In [155]:
url2="http://cocl.us/Geospatial_data"
geo_data=pd.read_csv(url2)
geo_data.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [156]:
final_table = df3.set_index('Postalcode').join(geo_data.set_index('Postal Code'))
final_table = final_table.sample(frac=1).reset_index(drop=True)
final_table.head(20)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307
1,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191
2,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923
3,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
4,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
5,North York,Victoria Village,43.725882,-79.315572
6,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
7,North York,Glencairn,43.709577,-79.445073
8,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
9,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675


# Part 3
## Map

In [160]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

Solving environment: done

## Package Plan ##

  environment location: /anaconda3

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.17.0               |             py_0          49 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          82 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.17.0-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.17.0         | 49 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


In [171]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [177]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(final_table['Latitude'], final_table['Longitude'], final_table['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto