In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

 # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# for webscraping import Beautiful Soup 
import bs4 as bs

import xml
!conda install -c conda-forge folium=0.5.0 --yes
 # uncomment this line if you haven't completed the Foursquare API lab
 # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

In [2]:
import folium

In [3]:
# retrieving data from site_url
site_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(site_url)
BSoup = bs.BeautifulSoup(res.content,'lxml')
table = BSoup.find_all('table')[0]
df = pd.read_html(str(table))
data = pd.read_json(df[0].to_json(orient='records'))

In [4]:
data.head() # head rows

Unnamed: 0,Borough,Neighborhood,Postal Code
0,Not assigned,,M1A
1,Not assigned,,M2A
2,North York,Parkwoods,M3A
3,North York,Victoria Village,M4A
4,Downtown Toronto,"Regent Park, Harbourfront",M5A


In [5]:
data_selected = data[data['Borough'] != 'Not assigned'] # selecting data

In [6]:
data_selected = data_selected.groupby(['Borough', 'Postal Code'], as_index=False).agg(','.join) # Grouping data

In [7]:
data_selected.head()

Unnamed: 0,Borough,Postal Code,Neighborhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,"North Toronto West, Lawrence Park"
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"


In [8]:
data_selected['Neighborhood'] = np.where(data_selected['Neighborhood'] == 'Not assigned', data_selected['Borough'], data_selected['Neighborhood']) 

In [9]:
data_selected.head()

Unnamed: 0,Borough,Postal Code,Neighborhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,"North Toronto West, Lawrence Park"
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"


In [10]:
data_selected.shape # shape of the data

(103, 3)

<h2>Task 2 </h2>

In [11]:
# geospatial data
geo_url = "https://cocl.us/Geospatial_data"
geo_data = pd.read_csv(geo_url)

In [12]:
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
geo_data.columns

Index(['Postal Code', 'Latitude', 'Longitude'], dtype='object')

In [14]:
merged_data = pd.merge(data_selected, geo_data, on='Postal Code') # merge data

In [15]:
merged_data.head()

Unnamed: 0,Borough,Postal Code,Neighborhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.38879
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316


In [16]:
merged_data = merged_data[['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']] # dezired format

In [17]:
merged_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


<h1>Task 3</h1>

In [18]:
merged_data['Coordinates'] = list(zip(merged_data['Latitude'], merged_data['Longitude']))

In [19]:
merged_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Coordinates
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,"(43.7280205, -79.3887901)"
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,"(43.7127511, -79.3901975)"
2,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,"(43.7153834, -79.40567840000001)"
3,M4S,Central Toronto,Davisville,43.704324,-79.38879,"(43.7043244, -79.3887901)"
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,"(43.6895743, -79.38315990000001)"


In [21]:
# using Nominatim()
address = 'Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
Latitude = location.latitude
Longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(Latitude, Longitude))

The geograpical coordinate of the City of Toronto are 43.6534817, -79.3839347.


  app.launch_new_instance()


In [26]:
# creating a map using longitude and latitude values
map_toronto = folium.Map(location=[Latitude, Longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(merged_data['Latitude'], merged_data['Longitude'], merged_data['Borough'], merged_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<h1>By Sudhamshu B N</h1>
