### Task: Segmenting and Clustering Neighborhoods in Toronto
#### By: Mácio Matheus Santos de Arruda

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim 
import folium as f

#### Get Wikipedia content with BeautifulSoup

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(url)
html = BeautifulSoup(r.content, "lxml")

#### Clean Not assigned and reset index

In [3]:
table = html.find_all('table')[0] 
df = pd.read_html(str(table))[0].iloc[1:,].rename({0:"PostalCode",1:"Borough",2:"Neighborhood"},axis=1)
df = df[df.Borough!="Not assigned"]
df.reset_index(inplace = True, drop = True)

#### Grouping by Postal Code and Borough cols and apply lambda function for join ', '

In [4]:
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].apply(lambda x: ", ".join(x.astype(str))).reset_index()

## GET COORDINATES

In [5]:
url = "http://cocl.us/Geospatial_data/Geospatial_Coordinates.csv"
coor = pd.read_csv(url)
coor.columns = ['PostalCode', 'Latitude', 'Longitude']
coor.rename({'Postal Code':'PostalCode'},axis=1, inplace=True)
df2 = pd.merge(df, coor,how='left', on='PostalCode')

## Plotting Folium MAP

In [6]:
geolocator = Nominatim()
loc = geolocator.geocode('Toronto')
lat = loc.latitude
long = loc.longitude
print(f'The coordinate of Toronto are {lat}, {long}.')

  """Entry point for launching an IPython kernel.


The coordinate of Toronto are 43.653963, -79.387207.


In [7]:
plot_map = f.Map(location=[lat, long], zoom_start=12)
for lt, lng, label in zip(df2['Latitude'], df2['Longitude'], df2['Neighborhood']):
    label = f.Popup(label, parse_html=True)
    f.CircleMarker(
        [lt, lng],
        radius=6,
        popup=label,
        color='blue',
        fill=True,
        fill_opacity=0.4,
        parse_html=True).add_to(plot_map) 
plot_map

#### Below, a printscreen containing the plotted map (if there is a problem in the previous cell)

![Folium map screenshot](https://raw.githubusercontent.com/macio-matheus/Coursera_Capstone/master/week3/screenshot_folium_map.png)