# Clustering Transportation Availability in Mumbai

In [None]:
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import geocoder
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import sklearn
from sklearn.cluster import KMeans

# Gathering Neighborhood Data from Wikipedia

In [None]:
html = urlopen("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Mumbai")
bsobj = BeautifulSoup(html,'lxml')

In [None]:
nb = []

for row in bsobj.find("div", {"class":"mw-category"}).findAll("li"):
    nb.append(row.text)
nb[0:5]

['List of neighbourhoods in Mumbai',
 'Aarey Forest',
 'Agripada',
 'Altamount Road',
 'Amboli, Mumbai']

In [None]:
df = pd.DataFrame(nb[1:], columns = ['Neighborhood'])
df.head()

Unnamed: 0,Neighborhood
0,Aarey Forest
1,Agripada
2,Altamount Road
3,"Amboli, Mumbai"
4,Amrut Nagar


In [None]:
df.shape

(134, 1)

In [None]:
def getlatlong(nb_name):
    ll = None
    while(ll is None):
        g = geocoder.arcgis('{}, Maharashtra, India'.format(nb_name))
        ll = g.latlng
        return ll

In [None]:
ll = [getlatlong(nb) for nb in df['Neighborhood'].tolist()]

# Assigning Latitude and Longitude Values to the neighborhoods

In [None]:
df_ll = pd.DataFrame(ll, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_ll['Latitude']
df['Longitude'] = df_ll['Longitude']
df.head(7)

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Aarey Forest,17.6636,75.89782
1,Agripada,18.97628,72.82615
2,Altamount Road,18.964334,72.807842
3,"Amboli, Mumbai",19.12906,72.84644
4,Amrut Nagar,18.9938,73.9926
5,Antop Hill,19.02614,72.86645
6,Anushakti Nagar,19.04283,72.92734


In [None]:
#checking for null values in Dataset
df.isna().sum()

Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

# Using Foursquare API to extract dataset

In [None]:
client_id = 'Y4WOPW5U1DAAEODI3EXPAMHK3ZQ3YRYPGAP2DCEN4OPIAFH1'
client_secret = 'DGEQIS2EMJTCFSDWZVHWRVTNN05TACMXMER5GQRNJFSUPXUR'
ver = '20210501'
lim = 100

In [None]:
import requests
radius = 1000

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):

    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(client_id,client_secret,ver,lat,long,radius,lim)

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):

    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(client_id,client_secret,ver,lat,long,radius,lim)

    # make the GET request
    results = requests.get(url).json()['response']['groups'][0]['items']

    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat,
            long,
            venue['venue']['name'],
            venue['venue']['location']['lat'],
            venue['venue']['location']['lng'],
            venue['venue']['categories'][0]['name']))

    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']

    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat,
            long,
            venue['venue']['name'],
            venue['venue']['location']['lat'],
            venue['venue']['location']['lng'],
            venue['venue']['categories'][0]['name']))

In [None]:
df_venue = pd.DataFrame(venues)
df_venue.columns=['Neighborhood','Latitude','Longitude','VenueName','VenueLat','VenueLng','VenueCategory']
print(df_venue.shape)
df_venue.head(40)

(9194, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLat,VenueLng,VenueCategory
0,Aarey Forest,17.6636,75.89782,Hotel Kamat,17.662984,75.901033,Indian Restaurant
1,Aarey Forest,17.6636,75.89782,cafe chokolade,17.6671,75.900976,Café
2,Aarey Forest,17.6636,75.89782,Cafe Cloud,17.661355,75.906268,Snack Place
3,Aarey Forest,17.6636,75.89782,Solapur Railway Station,17.664291,75.893319,Train Station
4,Aarey Forest,17.6636,75.89782,Hotel Kamat,17.662984,75.901033,Indian Restaurant
5,Aarey Forest,17.6636,75.89782,cafe chokolade,17.6671,75.900976,Café
6,Aarey Forest,17.6636,75.89782,Cafe Cloud,17.661355,75.906268,Snack Place
7,Aarey Forest,17.6636,75.89782,Solapur Railway Station,17.664291,75.893319,Train Station
8,Agripada,18.97628,72.82615,Celejor,18.975844,72.823679,Bakery
9,Agripada,18.97628,72.82615,Tote On The Turf,18.980266,72.820294,Nightclub


# Using one-hot encoding to quantify availabilty of venues

In [None]:
df_oh = pd.get_dummies(df_venue[['VenueCategory']], prefix="", prefix_sep="")
for col in df_oh.columns.values.tolist():
    print(col)
    df_oh[col] = df_oh[col].astype('float')
    print(df_oh[col].dtype)
    print('---------------------------')

ATM
float64
---------------------------
Airport
float64
---------------------------
American Restaurant
float64
---------------------------
Antique Shop
float64
---------------------------
Aquarium
float64
---------------------------
Arcade
float64
---------------------------
Art Gallery
float64
---------------------------
Arts & Crafts Store
float64
---------------------------
Asian Restaurant
float64
---------------------------
Athletics & Sports
float64
---------------------------
Australian Restaurant
float64
---------------------------
Automotive Shop
float64
---------------------------
BBQ Joint
float64
---------------------------
Bagel Shop
float64
---------------------------
Bakery
float64
---------------------------
Bank
float64
---------------------------
Bar
float64
---------------------------
Basketball Court
float64
---------------------------
Beach
float64
---------------------------
Bed & Breakfast
float64
---------------------------
Beer Bar
float64
--------------------

float64
---------------------------
Punjabi Restaurant
float64
---------------------------
Racetrack
float64
---------------------------
Recreation Center
float64
---------------------------
Residential Building (Apartment / Condo)
float64
---------------------------
Resort
float64
---------------------------
Restaurant
float64
---------------------------
Road
float64
---------------------------
Roof Deck
float64
---------------------------
Salad Place
float64
---------------------------
Salon / Barbershop
float64
---------------------------
Sandwich Place
float64
---------------------------
Scenic Lookout
float64
---------------------------
Seafood Restaurant
float64
---------------------------
Shawarma Place
float64
---------------------------
Shoe Store
float64
---------------------------
Shop & Service
float64
---------------------------
Shopping Mall
float64
---------------------------
Skate Park
float64
---------------------------
Skating Rink
float64
---------------------------


In [None]:
#making neighborhood the first column
df_oh['Neighborhood'] = df_venue['Neighborhood']
fix_cols = [df_oh.columns[-1]] + list(df_oh.columns[:-1])
df_oh = df_oh[fix_cols]
print(df_oh.shape)
df_oh.head(10)

(9194, 231)


Unnamed: 0,Neighborhood,Zoo,ATM,Airport,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,Arts & Crafts Store,...,Tree,Vegetarian / Vegan Restaurant,Video Game Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Agripada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Agripada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
dff = df_oh.groupby('Neighborhood').sum().reset_index()
dff.head(20)

Unnamed: 0,Neighborhood,Zoo,ATM,Airport,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,Arts & Crafts Store,...,Tree,Vegetarian / Vegan Restaurant,Video Game Store,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Aarey Forest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agripada,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Altamount Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
3,"Amboli, Mumbai",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Antop Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Anushakti Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Asalfa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Badhwar Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Baiganwadi,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Ballard Estate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
dff.dropna(inplace=True)
dff.shape

(125, 231)

# Exploring the number of different amenities

In [None]:
len(dff[dff['Airport'] > 0])

1

In [None]:
len(dff[dff['Train Station']>0])

46

In [None]:
len(dff[dff['Bus Station']>0])

14

In [None]:
len(dff[dff['Bus Line']>0])

1

In [None]:
len(dff[dff['Light Rail Station']>0])

2

In [None]:
len(dff[dff['Metro Station']>0])

1

In [None]:
len(dff[dff['Train']>0])

2

In [None]:
len(dff[dff['Harbor / Marina']>0])

12

In [None]:
len(dff[dff['Boat or Ferry']>0])

4

In [None]:
len(dff[dff['Bike Rental / Bike Share']>0])

1

In [None]:
len(dff[dff['Travel & Transport']>0])

1

In [None]:
len(dff[dff['Gym']>0])

32

In [None]:
len(dff[dff['ATM']>0])

2

# Creating columns for clustering metrics

In [None]:
total = []
transport = []
t = dff.sum(axis=1)
total=t.to_numpy()
for i in dff.index:
    tt = dff['Airport'].loc[i] + dff['Train Station'].loc[i] + dff['Bus Station'].loc[i] + dff['Bus Line'].loc[i] + dff['Light Rail Station'].loc[i] + dff['Metro Station'].loc[i] + dff['Train'].loc[i] + dff['Harbor / Marina'].loc[i] + dff['Boat or Ferry'].loc[i] + dff['Bike Rental / Bike Share'].loc[i] + dff['Travel & Transport'].loc[i]
    transport.append(tt)
print(total[0:10])
print("---------------------------------------------------")
transport[0:10]

[  8.  58. 130.  56.  24.  14.  42. 200.  12. 138.]
---------------------------------------------------


[2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 2.0, 0.0, 2.0, 4.0]

Here, we have made two arrays:<br> 1) total: containing total number of amenities in a neighborhood<br>
2) transport: number of public transportation infrastructure available.

In [None]:
dffs = pd.DataFrame(dff['Neighborhood'])
dffs['Total Amenities'] = total
dffs['Transport Amenities'] = transport
dffs.head()

Unnamed: 0,Neighborhood,Total Amenities,Transport Amenities
0,Aarey Forest,8.0,2.0
1,Agripada,58.0,2.0
2,Altamount Road,130.0,2.0
3,"Amboli, Mumbai",56.0,2.0
4,Antop Hill,24.0,2.0


We will be using two metrics to classify the neighborhoods:<br>
1) Total number of transport amenities<br>
2) Availability Factor, which is equal to the number of transport amenities divided by the number of total amenities in the neighborhood.<br>

In [None]:
# calculating availability factor
af=[]
for i in dffs.index:
    afv = dffs['Transport Amenities'].loc[i]/dffs['Total Amenities'].loc[i]
    af.append(afv)
af[0:5]

[0.25,
 0.034482758620689655,
 0.015384615384615385,
 0.03571428571428571,
 0.08333333333333333]

In [None]:
dffs['Availability Factor'] = af
dffs.head()

Unnamed: 0,Neighborhood,Total Amenities,Transport Amenities,Availability Factor
0,Aarey Forest,8.0,2.0,0.25
1,Agripada,58.0,2.0,0.034483
2,Altamount Road,130.0,2.0,0.015385
3,"Amboli, Mumbai",56.0,2.0,0.035714
4,Antop Hill,24.0,2.0,0.083333


In [None]:
dffs.describe()

Unnamed: 0,Total Amenities,Transport Amenities,Availability Factor
count,125.0,125.0,125.0
mean,73.552,1.824,0.053866
std,57.127963,2.300295,0.126837
min,2.0,0.0,0.0
25%,24.0,0.0,0.0
50%,62.0,2.0,0.013158
75%,110.0,4.0,0.046512
max,200.0,14.0,1.0


# K Means Clustering on the basis of Transportational Amenities

In [None]:
x1 = dffs[['Transport Amenities']]
km = KMeans(n_clusters=5, init='k-means++', random_state=1)
y = km.fit_predict(x1)
print(km.labels_)

[0 0 0 0 0 1 0 2 0 1 0 2 2 0 1 2 4 0 0 4 1 0 2 0 2 4 2 1 2 2 0 2 0 0 2 1 2
 2 4 2 2 0 2 1 1 0 4 2 0 2 0 1 2 0 2 2 2 2 2 2 2 2 0 0 4 0 1 1 0 0 2 4 2 2
 0 1 1 2 2 3 2 1 1 2 1 2 2 2 2 2 2 2 0 2 2 2 0 2 2 2 1 1 4 2 2 0 4 2 2 2 2
 2 0 1 4 1 2 2 0 2 2 2 0 2 4]


In [None]:
dffs['Cluster 1'] = km.labels_
y = dffs['Cluster 1']
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Cluster 1, dtype: int32

In [None]:
#calculating accuracy
from sklearn import metrics
from sklearn.metrics import pairwise_distances
print("Silhouette Score for Cluster 1:", metrics.silhouette_score(x1,y))

Silhouette Score for Cluster 1: 0.9800000000000001


# Creating new dataframe

In [None]:
dffs = dffs.join(df.set_index('Neighborhood'), on = 'Neighborhood')
dffs.sort_values('Cluster 1', inplace=True)
dffs.reset_index(inplace=True)
dffs.head()

Unnamed: 0,index,Neighborhood,Total Amenities,Transport Amenities,Availability Factor,Cluster 1,Latitude,Longitude
0,0,Aarey Forest,8.0,2.0,0.25,0,17.6636,75.89782
1,32,Dadar,2.0,2.0,1.0,0,18.79968,73.04897
2,33,Dadar Parsi Colony,134.0,2.0,0.014925,0,19.01914,72.85126
3,41,Dongri,12.0,2.0,0.166667,0,19.28333,72.78333
4,45,Four Bungalows,124.0,2.0,0.016129,0,19.12638,72.8242


In [None]:
dffs.drop('index', axis=1, inplace = True)
dffs.head()

Unnamed: 0,Neighborhood,Total Amenities,Transport Amenities,Availability Factor,Cluster 1,Latitude,Longitude
0,Aarey Forest,8.0,2.0,0.25,0,17.6636,75.89782
1,Dadar,2.0,2.0,1.0,0,18.79968,73.04897
2,Dadar Parsi Colony,134.0,2.0,0.014925,0,19.01914,72.85126
3,Dongri,12.0,2.0,0.166667,0,19.28333,72.78333
4,Four Bungalows,124.0,2.0,0.016129,0,19.12638,72.8242


In [None]:
td = dffs[['Neighborhood','Transport Amenities','Cluster 1']].sort_values('Transport Amenities',ascending=False).reset_index()
td.drop('index', axis=1, inplace=True)
td.head()

Unnamed: 0,Neighborhood,Transport Amenities,Cluster 1
0,"Mandvi, Mumbai",14.0,3
1,Lohar Chawl,8.0,4
2,Zaveri Bazaar,6.0,4
3,Byculla,6.0,4
4,Churchgate,6.0,4


# Exploring Cluster 1

### 0-Cluster

In [None]:
td[td['Cluster 1']==0]

Unnamed: 0,Neighborhood,Transport Amenities,Cluster 1
32,Dadar,2.0,0
33,Aarey Forest,2.0,0
34,Antop Hill,2.0,0
35,Tardeo,2.0,0
36,Baiganwadi,2.0,0
37,"Mahalaxmi, Mumbai",2.0,0
38,"Land's End, Bandra",2.0,0
39,Nepean Sea Road,2.0,0
40,Currey Road railway station,2.0,0
41,Kherwadi,2.0,0


We can see cluster 0 has areas with 2 pulic transportation facilities

### 1-Cluster

In [None]:
td[td['Cluster 1']==1]

Unnamed: 0,Neighborhood,Transport Amenities,Cluster 1
12,Pydhonie,4.0,1
13,"Matunga Road, Mumbai",4.0,1
14,Bori Bunder,4.0,1
15,Kopar Road,4.0,1
16,"Maheshwari Udyan, Mumbai",4.0,1
17,Mahim,4.0,1
18,Anushakti Nagar,4.0,1
19,Princess Street (Mumbai),4.0,1
20,Hindu Colony,4.0,1
21,Marine Lines,4.0,1


We can see 1-Cluster has neighborhoods with 4 public transportation amenities

### 2-Cluster

In [None]:
td[td['Cluster 1']==2]

Unnamed: 0,Neighborhood,Transport Amenities,Cluster 1
65,D.N. Nagar,0.0,2
66,Kala Ghoda,0.0,2
67,Kajuwadi,0.0,2
68,Dagdi Chawl,0.0,2
69,"Hiranandani Gardens, Mumbai",0.0,2
70,Bangur Nagar,0.0,2
71,Gorai,0.0,2
72,Manori,0.0,2
73,Kamathipura,0.0,2
74,Gokuldham,0.0,2


We can see 2-Cluster has neighborhoods with no public transportation amenities

### 3-Cluster

In [None]:
td[td['Cluster 1']==3]

Unnamed: 0,Neighborhood,Transport Amenities,Cluster 1
0,"Mandvi, Mumbai",14.0,3


We can see 3-Cluster has a neighborhood with 14 public transportation amenities

### 4-Cluster

In [None]:
td[td['Cluster 1']==4]

Unnamed: 0,Neighborhood,Transport Amenities,Cluster 1
1,Lohar Chawl,8.0,4
2,Zaveri Bazaar,6.0,4
3,Byculla,6.0,4
4,Churchgate,6.0,4
5,Royal Opera House (Mumbai),6.0,4
6,Dharavi,6.0,4
7,"Chandanwadi, Mumbai",6.0,4
8,Sewri,6.0,4
9,Ghodapdeo,6.0,4
10,Khotachiwadi,6.0,4


We can see 4-Cluster has neighborhoods with 6-8 public transportation amenities.

# K Means Clustering on the basis of availability factor

In [None]:
x2=dffs[['Availability Factor']]
km = KMeans(n_clusters=3, init='k-means++', random_state=1)
y = km.fit_predict(x2)
km.labels_

array([2, 1, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0,
       0, 0, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 2, 2, 0, 0, 0, 2, 1, 2, 0, 0, 0, 0])

In [None]:
dffs['Cluster 2'] = km.labels_
y = dffs['Cluster 2']
y.head()

0    2
1    1
2    0
3    2
4    0
Name: Cluster 2, dtype: int32

In [None]:
from sklearn.metrics import pairwise_distances
from sklearn import metrics
print("Silhouette Score for Cluster 2 is:", metrics.silhouette_score(x2,y))

Silhouette Score for Cluster 2 is: 0.8128126573589791


# Updating Dataframe

In [None]:
dffs = dffs[['Neighborhood','Latitude','Longitude','Total Amenities','Transport Amenities','Availability Factor','Cluster 1','Cluster 2']]
dffs.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Total Amenities,Transport Amenities,Availability Factor,Cluster 1,Cluster 2
0,Aarey Forest,17.6636,75.89782,8.0,2.0,0.25,0,2
1,Dadar,18.79968,73.04897,2.0,2.0,1.0,0,1
2,Dadar Parsi Colony,19.01914,72.85126,134.0,2.0,0.014925,0,0
3,Dongri,19.28333,72.78333,12.0,2.0,0.166667,0,2
4,Four Bungalows,19.12638,72.8242,124.0,2.0,0.016129,0,0


# Exploration of Cluster-2

In [None]:
td = dffs[['Neighborhood','Availability Factor','Cluster 2']].sort_values('Availability Factor', ascending=False).reset_index()
td.drop('index',axis=1,inplace=True)
td.head()

Unnamed: 0,Neighborhood,Availability Factor,Cluster 2
0,Dadar,1.0,1
1,Sewri,0.75,1
2,Kherwadi,0.333333,2
3,Anushakti Nagar,0.285714,2
4,Ferry Wharf,0.285714,2


### 0-Cluster

In [None]:
td[td['Cluster 2']==0]

Unnamed: 0,Neighborhood,Availability Factor,Cluster 2
18,Thakkar Bappa Colony,0.105263,0
19,Lalbaug,0.100000,0
20,Dharavi,0.096774,0
21,Antop Hill,0.083333,0
22,Thakurdwar,0.062500,0
...,...,...,...
120,"Shastri Nagar, Goregaon",0.000000,0
121,Shivaji Park,0.000000,0
122,Shivaji Park Residential Zone,0.000000,0
123,Prabhadevi,0.000000,0


0-Cluster comprises of neighborhoods that have an availability factor in the range 0-0.1053

### 1-Cluster

In [None]:
td[td['Cluster 2']==1]

Unnamed: 0,Neighborhood,Availability Factor,Cluster 2
0,Dadar,1.0,1
1,Sewri,0.75,1


1-Cluster comprises of neighborhoods that have an availability factor in the range 0.75-1.0

### 2-Cluster

In [None]:
td[td['Cluster 2']==2]

Unnamed: 0,Neighborhood,Availability Factor,Cluster 2
2,Kherwadi,0.333333,2
3,Anushakti Nagar,0.285714,2
4,Ferry Wharf,0.285714,2
5,"Mandvi, Mumbai",0.259259,2
6,Aarey Forest,0.25,2
7,Koliwada,0.2,2
8,Lallubhai Compound,0.2,2
9,Cotton Green,0.181818,2
10,Dava Bazaar,0.181818,2
11,Baiganwadi,0.166667,2


2-Cluster comprises of neighborhoods that have an availability factor in the range 0.1111-0.3333

# Visualizing Cluster 1

In [None]:
import folium
lat = 19.7515
lng = 75.7139
map_final = folium.Map(location = [lat,lng], zoom_start=7)

# colors
colors = ['yellow','orange','lawngreen','red','sienna']

for lat,lng,nb,clust in zip(dffs['Latitude'],dffs['Longitude'],dffs['Neighborhood'],dffs['Cluster 1']):
    lab = folium.Popup(str(nb) + 'Cluster:' + str(clust), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=lab,
        color=colors[clust],
        fill=True,
        fill_color=colors[clust],
        fill_opacity=0.5).add_to(map_final)
map_final

# Visualizing Cluster 2

In [None]:
import folium
lat = 19.7515
lng = 75.7139
map_final = folium.Map(location = [lat,lng], zoom_start=7)

# colors
colors = ['royalblue','indigo','navy']

for lat,lng,nb,clust in zip(dffs['Latitude'],dffs['Longitude'],dffs['Neighborhood'],dffs['Cluster 2']):
    lab = folium.Popup(str(nb) + 'Cluster:' + str(clust), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=lab,
        color=colors[clust],
        fill=True,
        fill_color=colors[clust],
        fill_opacity=0.5).add_to(map_final)
map_final