In [1]:
#!conda install beautifulsoup4

from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import csv

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



# Part 1

In [55]:
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(data, 'lxml')

postCode = []
boroughName = []
neighborhoodName = []

### Extracting data, cleaning it and presenting it in a DF

In [56]:
for row in soup.find('table').find_all('tr'):  #get table and table rows
    cells = row.find_all('td')
    if(len(cells) > 0):  #i.e. not empty
        postCode.append(cells[0].text.rstrip('\n'))
        boroughName.append(cells[1].text.rstrip('\n'))
        neighborhoodName.append(cells[2].text.rstrip('\n'))  #removing new line character

In [57]:
df = pd.DataFrame({"Postal Code": postCode,"Borough": boroughName,"Neighborhood": neighborhoodName}) #create df

df2 = df[df.Borough != 'Not assigned'].reset_index(drop=True) #drop unassigned

df3 = df2.groupby(['Postal Code','Borough'], as_index=False).agg(lambda x: ', '.join(x)) #join neighbourhoods of same borough

df3.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Cleaned DF now has 103 rows

In [12]:
df3.shape

(103, 3)

# Part 2

In [58]:
import csv
import os
os.chdir('/Users/Nikita/Desktop/IBM course') 
csv_file = open('Geospatial_Coordinates.csv', 'r') 
#use given csv file provided by IBM

In [59]:
GeoCoords = pd.read_csv(csv_file) 
df3 = pd.merge(df3, GeoCoords, how= 'inner', on = 'Postal Code') #use given csv file provided by IBM
    
df3.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [60]:
csv_file.close()

# Part 3

In [61]:
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\Nikita\anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.9.2                |   py38haa244fe_0         3.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.1.0                |     pyhd3deb0d_0          64 KB  conda-forge
    python_abi-3.8             |           1_cp38           4 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.1.0-pyhd3deb0d_0
  python_abi

In [62]:
# get coords of Toronto
address = 'Toronto , Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto