## Import Data from wikipedia, clean it and import latitude and longitube

In [None]:
## Import libraries
!conda install -c conda-forge wikipedia --yes

## Import libraries
import pandas as pd
import wikipedia as wp

## Import data using the Wikipedia API
html = wp.page("https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050.").html().encode("UTF-8")
df = pd.read_html(html)[0]
df.head()


## Remove lines where the Borough is not assigned
df = df[df.Borough != 'Not assigned']

## Replace Neighbourhood = Not assigned with the Borough name

df.Neighbourhood = df.apply(lambda x: x['Borough'] if x['Neighbourhood']=='Not assigned' else x['Neighbourhood'], axis=1)

## Join the lines for Neighbourhoods

df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ','.join(x)).reset_index()

## Load the data into a dataframe

postcode_data = pd.read_csv('http://cocl.us/Geospatial_data')

## Rename the column
postcode_data.rename(columns={'Postal Code': 'Postcode'}, inplace=True)

## Merge the information from the csv file into the main dataframe
df = pd.merge(df,
                 postcode_data[['Postcode', 'Latitude', 'Longitude']],
                 on='Postcode')
df.head()

Solving environment: | 

In [None]:
df_toronto = df[df["Neighbourhood"].str.contains("Toronto")]

In [None]:
!conda install -c conda-forge folium=0.5.0 --yes 
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab

import folium
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [None]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [None]:
from sklearn.cluster import KMeans

In [None]:
# set number of clusters
kclusters = 1

toronto_grouped_clustering = df.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 