In [10]:
import requests
import numpy as np
import pandas as pd

In [2]:
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

page = requests.get(wikipedia_link).text

In [None]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(page, 'lxml')
table = soup.find('table', class_='wikitable sortable')

In [200]:
# Get table headers and initialize data frame
header = table.find('tr')
lh = []
for header_name in header.find_all('th'):
    lh.append(header_name.text.strip())

df = pd.DataFrame(columns=lh)

# populate data frame; exclude first tr (index 0), which is header row
lall = np.empty((0,3), dtype = 'str')

for tr in table.find_all('tr')[1:]:
    lrow = []
    for td in tr.find_all('td'):
        lrow.append(td.text.strip())
    
    if lrow[0] in df['Postcode'].values:
        df.iloc[df.index[df['Postcode'] == lrow[0]][0],2] = df.iloc[df.index[df['Postcode'] == lrow[0]][0],2]+', '+lrow[2]
        
    else:
        
        if lrow[2] == 'Not assigned':
            lrow[2] = lrow[1]
        
        if lrow[1] != 'Not assigned':
            df = df.append(pd.DataFrame([lrow], columns = lh), ignore_index = True)
            #lall = np.append(lall, [lrow], axis = 0)
        
#df = df.append(pd.DataFrame(lall, columns = lh), ignore_index = True)
df[0:20]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [203]:
df.shape

(103, 3)

In [208]:
url="http://cocl.us/Geospatial_data"
lls = pd.read_csv(url)
lls.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [222]:
df = df.merge(lls, how='left', left_on='Postcode', right_on='Postal Code').drop('Postal Code', axis = 1)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [229]:
from sklearn.cluster import KMeans
k_means = KMeans(init="k-means++", n_clusters=4, n_init=12)
k_means.fit(df[['Latitude', 'Longitude']])

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=4, n_init=12, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

The clusters and corresponding colors are added to the data frame

In [269]:
df['Cluster'] = k_means.labels_
df['Color'] = 'color'
for i in range(0,df.Cluster.count()):
    if df.Cluster[i] == 0:
        df.loc[i:, 'Color'] = 'red'
    elif df.Cluster[i] == 1:
        df.loc[i:, 'Color'] = 'blue'
    elif df.Cluster[i] == 2:
        df.loc[i:, 'Color'] = 'green'
    else:
        df.loc[i:, 'Color'] = 'yellow'
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster,Color
0,M3A,North York,Parkwoods,43.753259,-79.329656,2,green
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,green
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,0,red
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,3,yellow
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,0,red


In [223]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Folium installed and imported!')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  45.79 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  36.43 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  41.86 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  46.13 MB/s
Folium installed and imported!


In [295]:
# San Francisco latitude and longitude values
latitude = 43.6532
longitude = -79.3832

# create map and display it
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [296]:
# instantiate a feature group for the incidents in the dataframe
boroughs = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for lat, long, color in zip(df.Latitude, df.Longitude, df.Color):
    boroughs.add_child(
        folium.features.CircleMarker(
            [lat, long],
            radius=5, # define how big you want the circle markers to be
            color = color,
        )
    )

# add incidents to map
toronto_map.add_child(boroughs)

In [301]:
from IPython.display import HTML, display

width, height = (400,500) # width and height of the displayed iFrame, in pixels

srcdoc = toronto_map._repr_html_().replace('"', '&quot;')
embed = HTML('<iframe srcdoc="{}" '
             'style="width: {}px; height: {}px; display:block; width: 50%; margin: 0 auto; '
             'border: none"></iframe>'.format(srcdoc, width, height))
embed