In [12]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

#read the previous csv saved in part 2 

df=pd.read_csv('Week3_Capstone_part2.csv')
df.head()

#We only want entries in which borough = Toronto 

df_Toronto = df[df['Borough'].str.contains('Toronto')]
df_Toronto = df_Toronto.reset_index(drop=True)
df_Toronto.head()

print('the Toronto dataset has',df_Toronto.shape[0], 'rows and', df_Toronto.shape[1], 'columns')

# Identify different value of Borough *noticed a few unique entries are coded them accordingly in the line 25 
df_Toronto['Borough'].value_counts()

# Create a new column as Label and get the date from 'Borough' as integer
df_Toronto['Label']=df_Toronto['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto','East Toronto Business','Downtown Toronto Stn A','East York/East Toronto'],value=[1,2,3,4,4,1,4],inplace=False)
df_Toronto.head()

#Derive latitude and longitude for Toronto using Nominatim 

address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(f'The geograpical coordinate of Toronto are {latitude}, {longitude}.')


#Create cluster map of Toronto by first setting N(clusters) as N(labels) 

kclusters=len(df_Toronto.Label.unique())

#create the cluster map

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, cluster in zip(df_Toronto['Latitude'], df_Toronto['Longitude'], df_Toronto['Label']):
    label = folium.Popup(str(df_Toronto['Borough']) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_toronto)

map_toronto



the Toronto dataset has 39 rows and 5 columns
The geograpical coordinate of Toronto are 43.6534817, -79.3839347.
