In [14]:
import numpy as np 

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 

!conda install -c conda-forge geopy --yes  
from geopy.geocoders import Nominatim 

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes  
import folium # map rendering library

from bs4 import BeautifulSoup # library to parse HTML and XML documents

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         560 KB

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-

### Initialize Dataframe

In [26]:
wiki_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_response = requests.get(wiki_url).text

### Load and explore the data

In [27]:
soup = BeautifulSoup(wiki_response, 'lxml')
# To keep the code compact we comment out the soup.prettify() result
#print(soup.prettify())


### Transform the data into a pandas dataframe

In [28]:
table = soup.find_all('table')[0] 
neighborhoods = pd.read_html(str(table))[0]
neighborhoods.head()

# To prevent futher conflicts correct the column name from 'Neighbourhood' to 'Neighborhood
neighborhoods.rename(columns={'Neighbourhood':'Neighborhood'}, inplace=True)

### Find our unique Borough names and how many units each of them has

In [29]:
print(neighborhoods['Borough'].value_counts())


KeyError: 'Borough'

### Remove rows where Borough and Neighborhood columns have 'Not assigned' values

In [None]:
neighborhoods = neighborhoods[neighborhoods['Borough']!='Not assigned']

# Reset the index
neighborhoods.reset_index(drop=True, inplace=True)
neighborhoods.head()

### Combine rows where more than one neighborhood exists in one postal code area into one comma separated row

In [None]:
# Format dataframe columns width to display Neighborhood column full length
pd.set_option('display.max_colwidth', -1)

# Group dataframe and combine rows where more than one neighborhood exists into one comma separated row.
neighborhoods = neighborhoods.groupby(['Postcode','Borough'])['Neighborhood'].apply(lambda x: ', '.join(x)).reset_index()

neighborhoods.head(12)

### Find rows where Neighborhood column has 'Not assigned' value and replace it with corresponding Borough value


In [None]:
borough_data = neighborhoods['Borough']
neighborhoods_data = neighborhoods['Neighborhood']

# Loop through the data
for ind, (borough_address, neighborhoods_address) in enumerate(zip(borough_data, neighborhoods_data)):
    if neighborhoods_address.strip() == "Not assigned":
        neighborhoods_data[ind] = borough_data[ind]

# Check that borough named 'Queen's Park' has the same named neighbourhood
print(pd.DataFrame(neighborhoods.iloc[83:88,]))

### Display the number of rows in neighborhoods dataframe

In [None]:
print(neighborhoods.shape)


## Assignment 2

In [12]:
# Read in Geospatial_Coordinates.csv file
geospatial_coordinates_df = pd.read_csv("Geospatial_Coordinates.csv")
print(neighborhoods.shape)
geospatial_coordinates_df.head()

FileNotFoundError: File b'Geospatial_Coordinates.csv' does not exist

In [None]:
neighborhoods.join(geospatial_coordinates_df)

# set_index on other_df to be key
neighborhoods = neighborhoods.join(geospatial_coordinates_df.set_index('Postal Code'), on='Postcode')
neighborhoods.head()

## Assignment 3

In [22]:
neighborhoods.drop(['Postcode'], axis=1, inplace=True)
neighborhoods.head()

ValueError: labels ['Postcode'] not contained in axis

In [23]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


## Create a map of Toronto with neighborhoods superimposed on top

In [24]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto


KeyError: 'Latitude'

In [25]:
central_toronto_data = neighborhoods[neighborhoods['Borough'] == 'Central Toronto'].reset_index(drop=True)
central_toronto_data.head()

KeyError: 'Borough'