## Import necessary libraries

In [26]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs
import geocoder
from sklearn.cluster import KMeans
import folium

## Copy the url to a varible

In [27]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

## Loading the html into DataFrame

In [28]:
df=pd.read_html(url)

df=df[0]

df=df[df.Borough != 'Not assigned']

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## DataFrame shape 

In [29]:
df.shape

(103, 3)

### Borough DataFrame with Latitudes and Longitudes

In [31]:
postal_df=pd.read_csv("https://cocl.us/Geospatial_data")

df=df.merge(postal_df,on='Postal Code')

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [32]:
world_map=folium.Map([df.iloc[0,-2],df.iloc[0,-1]],zoom_start=10)

In [33]:
from folium.plugins import MarkerCluster

marker_cluster=marker_cluster = MarkerCluster(name='1000 clustered icons',overlay=True,control=False,icon_create_function=None)

for x in zip(df.Latitude,df.Longitude,df.Borough):
    folium.CircleMarker([x[0],x[1]],popup=x[2],fill_color='green',fill_opacity=0.4).add_to(marker_cluster)
marker_cluster.add_to(world_map)

<folium.plugins.marker_cluster.MarkerCluster at 0x11b4e830>

## Cluster_Marker map of toronto without KMeans Clusters

In [34]:
toronto_map=world_map
toronto_map

## Foursquare Stuff

In [35]:
clientid='DNHZPDGXU4AS4MYZ4PLRHULAMRXWEYUOB4OYSJM4D0V0IVEQ'
clientsecret='J2EVYDRTIYIXWXOMFJK0C4V5NMWEZRSMRMDNXWWN4MO21V23'
version=20200628
radius=500
limit=100

## Funtion to get venues for each neighborhood

In [49]:
common_venues={}
def get_venues(latitude,longitude):
    
    ll=latitude,longitude

    url ='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(clientid,clientsecret,version,ll[0],ll[1],radius,limit)
    venue_df=pd.json_normalize(requests.get(url).json()['response']['groups'][0]['items'])
    
    
    return get_features(venue_df)


## Function to get Required Features from Neighborhood Venues

In [50]:
features= ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']

def get_features(df):
    
    
    if df.values.tolist()!=[]:
        df=df[features]
        
        df.columns=['venue_name','venue_category','venue_lat','venue_lng']
        df.loc[:,'venue_category']=df.loc[:,'venue_category'].map(lambda x : x[0]['name'])

        return pd.get_dummies(df.venue_category).sum().sort_values(ascending=False)[:2].index.tolist()

    elif df.values.tolist()==[]:
        return [0,0]
    else:
        return 1
    

## Analysis and getting  Top 2 Frequent Venues for each Neighborhood

In [51]:
df.loc[:,'First_common_category']=0
df.loc[:,'Second_common_category']=0

In [52]:
for x in range(len(df)):
    ll=df.Latitude[x],df.Longitude[x]
    url ='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(clientid,clientsecret,version,ll[0],ll[1],radius,limit)

    ls=get_features(pd.json_normalize(requests.get(url).json()['response']['groups'][0]['items']))
    
    if len(ls)==2:
        df.iloc[x,5]=ls[0]
        df.iloc[x,6]=ls[1]
    
   
    
    elif len(ls)==1:
        df.iloc[x,5]=ls[0]


    
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


# KMeans Clustering

In [53]:
n_clusters=5
kmc=KMeans(n_clusters=n_clusters,random_state=0)

model=kmc.fit(pd.get_dummies(df))

#### Model Labels

In [41]:
df.loc[:,'Cluster_labels']=model.labels_

model.labels_

array([1, 1, 0, 1, 0, 4, 3, 1, 2, 0, 1, 4, 3, 1, 2, 0, 2, 4, 3, 2, 0, 2,
       3, 0, 0, 0, 3, 1, 1, 2, 0, 2, 3, 1, 1, 2, 0, 2, 3, 1, 1, 2, 0, 2,
       3, 1, 1, 2, 0, 1, 1, 3, 1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 2, 2, 3,
       1, 2, 2, 2, 4, 3, 1, 2, 2, 2, 2, 4, 3, 2, 0, 0, 3, 2, 0, 3, 2, 0,
       4, 4, 3, 0, 0, 4, 4, 3, 0, 0, 4, 0, 2, 4, 4])

In [42]:
import matplotlib.cm as cm
import matplotlib.colors as colors

#### Cluster Colors

In [43]:
colors_array=cm.viridis(np.linspace(0,1,n_clusters))
colors=[colors.rgb2hex(x) for x in colors_array]

colors

['#440154', '#3b528b', '#21918c', '#5ec962', '#fde725']

### Dataframe for Map

In [55]:
latlng_df=df.iloc[:,[3,4,7]]
col_df=pd.DataFrame([x for x in zip(range(5),colors)])
                     
map_df=pd.merge(latlng_df,col_df,left_on=latlng_df.loc[:,'Cluster_labels'],right_on=col_df.iloc[:,0])

map_df=map_df.rename({1:'color'},axis=1)

map_df=map_df.loc[:,['Latitude','Longitude','color']]


map_df

Unnamed: 0,Latitude,Longitude,color
0,43.753259,-79.329656,#3b528b
1,43.725882,-79.315572,#3b528b
2,43.718518,-79.464763,#3b528b
3,43.745906,-79.352188,#3b528b
4,43.709577,-79.445073,#3b528b
...,...,...,...
98,43.636966,-79.615819,#21918c
99,43.704324,-79.388790,#21918c
100,43.689574,-79.383160,#21918c
101,43.686412,-79.400049,#21918c


In [56]:
cluster_map=folium.Map([df.Latitude[0],df.Longitude[0]],zoom_start=11)

### Adding markers to map with Color for each cluster

In [57]:
for x in zip(map_df.Latitude,map_df.Longitude,map_df.color,df.Cluster_labels):
    folium.CircleMarker([x[0],x[1]],color=x[2],fill_color=x[2],fill_opacity=0.7,popup=x[3]).add_to(cluster_map)

## Map of Toronto with KMeans Clusters

In [58]:
cluster_map