## Importing CSV for longitude and latitude values for neighborhoods in Toronto

In [35]:
latlon = pd.read_csv('https://cocl.us/Geospatial_data')
latlon.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [40]:
#Merging the two dataframes and unifying based on Postal Code
df2 = pd.merge(df, latlon, on='Postal Code')
df2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [52]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df2['Borough'].unique()),
        df2.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [58]:
df2.Borough.unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

### Combining boroughs containing 'Toronto' as a singluar borough and removing Mississuaga to amalgamate and match the offical Toronto’s six boroughs.

In [62]:
df2 = df2[df2.Borough != 'Mississauga']
df2.loc[df2['Borough'].str.contains('Toronto'), 'Borough'] = 'Toronto'
df2.Borough.unique()

array(['North York', 'Toronto', 'Etobicoke', 'Scarborough', 'East York',
       'York'], dtype=object)

# Map of Toronto

In [73]:
toronto_map = folium.Map(location=[43.6532,-79.3832],zoom_start=10)

for lat,lng,borough,neighborhood in zip(df2['Latitude'],df2['Longitude'],df2['Borough'],df2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#ADD8E6',
    fill_opacity=0.5,
    parse_html=False).add_to(toronto_map)
toronto_map

## K means clustering of Toronto neighborhood

In [77]:
k=6 #6ix
toronto_clusters = df2.drop(['Postal Code','Borough','Neighborhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(toronto_clusters)
kmeans.labels_
df2.insert(0, 'Cluster Labels', kmeans.labels_)

ValueError: cannot insert Cluster Labels, already exists

# Toronto clustered based on Neighborhoods

In [79]:
# create map
map_clusters = folium.Map(location=[43.6532,-79.3832],zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighbourhood, cluster in zip(df2['Latitude'], df2['Longitude'], df2['Neighborhood'], df2['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters