<h1>Importing Libraries</h1>

In [None]:
import numpy as np
import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print("Libraries imported.")

<h1>Scrapping web data</h1>

In [3]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Suburbs_of_Mumbai").text
soup = BeautifulSoup(data, 'html.parser')

In [4]:
neighborhoodList = []

In [5]:
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [6]:
Ml_df = pd.DataFrame({"Neighborhood": neighborhoodList})

Ml_df.head()

Unnamed: 0,Neighborhood
0,Andheri
1,Anushakti Nagar
2,Baiganwadi
3,Bandra
4,Bhandup


In [7]:
Ml_df.shape

(42, 1)

<h1>Get the geographical coordinates</h1>

In [8]:
!pip install geocoder
import geocoder
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Mumbai, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 9.5MB/s ta 0:00:011
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [11]:
coords = [ get_latlng(neighborhood) for neighborhood in Ml_df["Neighborhood"].tolist() ]

In [12]:
coords

[[19.118459378296492, 72.84176321065843],
 [19.042830000000038, 72.92734000000007],
 [19.062940000000026, 72.92663000000005],
 [19.054370000000063, 72.84017000000006],
 [19.145560000000046, 72.94856000000004],
 [19.229360000000042, 72.85751000000005],
 [19.208660000000066, 72.82612000000006],
 [19.06218000000007, 72.90241000000003],
 [19.250030000000038, 72.85907000000003],
 [19.224690000000066, 72.86605000000003],
 [19.212750000000028, 73.08324000000005],
 [19.00534722389655, 72.85580272012932],
 [19.08652321008152, 72.90900774216628],
 [19.164550000000077, 72.84946000000008],
 [18.959290000000067, 72.83108000000004],
 [19.137920000000065, 72.84941000000003],
 [19.014920000000075, 72.84522000000004],
 [18.953937419095155, 72.82036732944775],
 [19.21198153260436, 72.83757275783374],
 [19.131380000000036, 72.93568000000005],
 [19.127580000000023, 72.82539000000008],
 [19.064980000000048, 72.88069000000007],
 [19.21198153260436, 72.83757275783374],
 [19.048530000000028, 72.93222000000003

In [13]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [14]:
Ml_df['Latitude'] = df_coords['Latitude']
Ml_df['Longitude'] = df_coords['Longitude']

In [15]:
print(Ml_df.shape)
Ml_df

(42, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Andheri,19.118459,72.841763
1,Anushakti Nagar,19.04283,72.92734
2,Baiganwadi,19.06294,72.92663
3,Bandra,19.05437,72.84017
4,Bhandup,19.14556,72.94856
5,Borivali,19.22936,72.85751
6,Charkop,19.20866,72.82612
7,Chembur,19.06218,72.90241
8,Dahisar,19.25003,72.85907
9,Devipada,19.22469,72.86605


In [16]:
Ml_df.to_csv("Ml_df.csv", index=False)

<h1>Create a map of Mumbai with neighborhoods on top</h1>

In [17]:
address = 'Mumbai, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai, India 18.9387711, 72.8353355.


In [18]:
map_Ml = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(Ml_df['Latitude'], Ml_df['Longitude'], Ml_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Ml)  
    
map_Ml

In [19]:
map_Ml.save('map_Ml.html')

<h1>Using Foursquare to get neighborhood venues</h1>

In [20]:
CLIENT_ID = 'GYRPOICLY4F3THPAA2LIM3V4UVDC1KFZ35U3QDOR20VCPIUW' # your Foursquare ID
CLIENT_SECRET = 'KX1KY0DRXZWRAD1Y232QP11544JMFNE2IGOW4KT4TMRE5TCG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GYRPOICLY4F3THPAA2LIM3V4UVDC1KFZ35U3QDOR20VCPIUW
CLIENT_SECRET:KX1KY0DRXZWRAD1Y232QP11544JMFNE2IGOW4KT4TMRE5TCG


In [21]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(Ml_df['Latitude'], Ml_df['Longitude'], Ml_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [22]:
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(2670, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Andheri,19.118459,72.841763,Merwans Cake shop,19.1193,72.845418,Bakery
1,Andheri,19.118459,72.841763,Radha Krishna Veg Restaurant,19.11513,72.84306,Indian Restaurant
2,Andheri,19.118459,72.841763,Naturals,19.111204,72.837255,Ice Cream Shop
3,Andheri,19.118459,72.841763,Temptations,19.113767,72.841337,Ice Cream Shop
4,Andheri,19.118459,72.841763,Narayan Sandwich,19.121398,72.85027,Sandwich Place


In [23]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Andheri,100,100,100,100,100,100
Anushakti Nagar,16,16,16,16,16,16
Baiganwadi,8,8,8,8,8,8
Bandra,100,100,100,100,100,100
Bhandup,24,24,24,24,24,24
Borivali,100,100,100,100,100,100
Charkop,53,53,53,53,53,53
Chembur,100,100,100,100,100,100
Dahisar,66,66,66,66,66,66
Devipada,88,88,88,88,88,88


In [24]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 196 uniques categories.


<h4> Let's find out unique venues</h4>

In [25]:
venues_df['VenueCategory'].unique()[:50]

array(['Bakery', 'Indian Restaurant', 'Ice Cream Shop', 'Sandwich Place',
       'Fast Food Restaurant', 'Pub', 'Falafel Restaurant', 'Juice Bar',
       'Pizza Place', 'Coffee Shop', 'Multiplex', 'Seafood Restaurant',
       'Breakfast Spot', 'Chinese Restaurant', 'Snack Place',
       'Maharashtrian Restaurant', 'Café', 'American Restaurant',
       'Cocktail Bar', 'Bar', 'BBQ Joint', 'Diner', 'Lounge',
       'Asian Restaurant', 'Electronics Store', 'Spa', 'Department Store',
       "Women's Store", 'Residential Building (Apartment / Condo)',
       'Gym / Fitness Center', 'Smoke Shop', 'Food Truck',
       'Athletics & Sports', 'Vegetarian / Vegan Restaurant', 'Park',
       'Liquor Store', 'Fish Market', 'Martial Arts Dojo', 'Tea Room',
       'Dessert Shop', 'Food', 'Plaza', 'Bus Station', 'Sports Bar',
       'Platform', 'Food & Drink Shop', 'Music Venue', 'Tunnel',
       'Fried Chicken Joint', 'Gym'], dtype=object)

In [26]:
"Neighborhood" in venues_df['VenueCategory'].unique()

True

<h1>Analysing neighborhoods</h1>

In [27]:
# one hot encoding
Ml_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Ml_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Ml_onehot.columns[-1]] + list(Ml_onehot.columns[:-1])
Ml_onehot = Ml_onehot[fixed_columns]

print(Ml_onehot.shape)
Ml_onehot.head()

(2670, 197)


Unnamed: 0,Neighborhoods,ATM,Afghan Restaurant,American Restaurant,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Basketball Court,Beach,Bed & Breakfast,Beer Garden,Big Box Store,Bike Rental / Bike Share,Bistro,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Building,Burger Joint,Burrito Place,Bus Station,Cafeteria,Café,Camera Store,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Auditorium,College Gym,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,English Restaurant,Event Space,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,General College & University,General Entertainment,German Restaurant,Goan Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hawaiian Restaurant,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Outdoors & Recreation,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Recording Studio,Recreation Center,Residential Building (Apartment / Condo),Resort,Restaurant,Road,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South American Restaurant,South Indian Restaurant,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toll Booth,Toy / Game Store,Track,Trail,Train,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Wine Bar,Wine Shop,Women's Store
0,Andheri,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
Ml_grouped = Ml_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(Ml_grouped.shape)
Ml_grouped

(41, 197)


Unnamed: 0,Neighborhoods,ATM,Afghan Restaurant,American Restaurant,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Basketball Court,Beach,Bed & Breakfast,Beer Garden,Big Box Store,Bike Rental / Bike Share,Bistro,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Building,Burger Joint,Burrito Place,Bus Station,Cafeteria,Café,Camera Store,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Auditorium,College Gym,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,English Restaurant,Event Space,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,General College & University,General Entertainment,German Restaurant,Goan Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hawaiian Restaurant,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Outdoors & Recreation,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Recording Studio,Recreation Center,Residential Building (Apartment / Condo),Resort,Restaurant,Road,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South American Restaurant,South Indian Restaurant,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toll Booth,Toy / Game Store,Track,Trail,Train,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Wine Bar,Wine Shop,Women's Store
0,Andheri,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.02,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.05,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.16,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01
1,Anushakti Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.125,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0
2,Baiganwadi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bandra,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.01,0.01,0.06,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.09,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.02,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bhandup,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.208333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
5,Borivali,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.08,0.0,0.0,0.05,0.03,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.07,0.11,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0
6,Charkop,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.037736,0.0,0.0,0.037736,0.0,0.0,0.075472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.113208,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.018868,0.0,0.018868,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.037736,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.037736,0.037736,0.0,0.0,0.0,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.056604,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Chembur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.19,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.04,0.01,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0
8,Dahisar,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.015152,0.015152,0.030303,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.045455,0.0,0.0,0.090909,0.015152,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.030303,0.0,0.0,0.015152,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.060606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.060606,0.075758,0.015152,0.0,0.0,0.015152,0.0,0.0,0.015152,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.015152,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.015152,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0
9,Devipada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.056818,0.0,0.0,0.034091,0.034091,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.034091,0.0,0.0,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.056818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.079545,0.125,0.022727,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.011364,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.011364,0.045455,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.022727,0.0,0.0,0.0,0.0,0.045455,0.011364,0.0,0.022727,0.0,0.011364,0.0,0.0,0.034091,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0


<h4>Creating a data frame with shoping mall</h4>

In [29]:
Ml_mall = Ml_grouped[["Neighborhoods","Shopping Mall"]]

In [30]:
Ml_mall.head()

Unnamed: 0,Neighborhoods,Shopping Mall
0,Andheri,0.0
1,Anushakti Nagar,0.0
2,Baiganwadi,0.0
3,Bandra,0.0
4,Bhandup,0.041667


<h1>Clustering Neighborhoods</h1>

In [31]:
# set number of clusters
kclusters = 3

Ml_clustering = Ml_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Ml_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 1, 2, 0, 2, 0, 0], dtype=int32)

In [32]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
Ml_merged = Ml_mall.copy()

# add clustering labels
Ml_merged["Cluster Labels"] = kmeans.labels_

In [33]:
Ml_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
Ml_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Andheri,0.0,0
1,Anushakti Nagar,0.0,0
2,Baiganwadi,0.0,0
3,Bandra,0.0,0
4,Bhandup,0.041667,1


In [41]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Ml_merged = Ml_merged.join(Ml_df.set_index("Neighborhood"), on="Neighborhood")

print(Ml_merged.shape)
Ml_merged.head() # check the last columns!

(41, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Andheri,0.0,0,19.118459,72.841763
1,Anushakti Nagar,0.0,0,19.04283,72.92734
2,Baiganwadi,0.0,0,19.06294,72.92663
3,Bandra,0.0,0,19.05437,72.84017
4,Bhandup,0.041667,1,19.14556,72.94856


In [38]:
# sort the results by Cluster Labels
print(Ml_merged.shape)
Ml_merged.sort_values(["Cluster Labels"], inplace=True)
Ml_merged

(41, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Andheri,0.0,0,19.118459,72.841763
34,Tilak Nagar (Mumbai),0.0,0,18.99616,72.85279
31,"Sion, Mumbai",0.0,0,19.04359,72.86412
30,Shil Phata,0.0,0,19.14658,73.04005
27,Mumbra,0.0,0,19.19054,73.02266
25,Mira Road,0.0,0,19.280032,72.867932
24,"Matharpacady, Mumbai",0.0,0,19.04492,72.867205
23,Mankhurd,0.0,0,19.04853,72.93222
22,Mahavir Nagar (Kandivali),0.0,0,19.211982,72.837573
21,Kurla,0.0,0,19.06498,72.88069


In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Ml_merged['Latitude'], Ml_merged['Longitude'], Ml_merged['Neighborhood'], Ml_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [43]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

<h1>Examining Clusters</h1>

<h4>Cluster 0</h4>

In [44]:
Ml_merged.loc[Ml_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Andheri,0.0,0,19.118459,72.841763
1,Anushakti Nagar,0.0,0,19.04283,72.92734
2,Baiganwadi,0.0,0,19.06294,72.92663
3,Bandra,0.0,0,19.05437,72.84017
6,Charkop,0.0,0,19.20866,72.82612
8,Dahisar,0.0,0,19.25003,72.85907
9,Devipada,0.0,0,19.22469,72.86605
10,Dombivli,0.0,0,19.21275,73.08324
11,Eastern Suburbs (Mumbai),0.0,0,19.005347,72.855803
14,Grant Road,0.0,0,18.95929,72.83108


<h4>Cluster 1</h1>

In [45]:
Ml_merged.loc[Ml_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
4,Bhandup,0.041667,1,19.14556,72.94856
19,Kanjurmarg,0.038462,1,19.13138,72.93568
26,Mulund,0.041096,1,19.17183,72.95565
33,Thakur village,0.032258,1,19.2102,72.87541
35,Uttan,0.047619,1,26.86634,80.93884
36,Vashi,0.03,1,19.08465,72.90481
37,Vikhroli,0.03,1,19.11109,72.92781


<h4>Cluster 2</h4>

In [47]:
Ml_merged.loc[Ml_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
5,Borivali,0.01,2,19.22936,72.85751
7,Chembur,0.01,2,19.06218,72.90241
12,Ghatkopar,0.012346,2,19.086523,72.909008
13,Goregaon,0.02,2,19.16455,72.84946
28,Pestom sagar,0.012048,2,19.07064,72.90217
29,Seven Bungalows,0.01,2,19.131342,72.816342
32,"Sonapur, Bhandup",0.023256,2,19.16394,72.93544
38,Wadala,0.020619,2,19.0172,72.85817
39,Western Suburbs (Mumbai),0.012658,2,19.19701,72.82768
40,Worli,0.02,2,19.00744,72.81688


<h4>Observations</h4>
<p>We can see cluster 0 has no shopping malls, cluster 1 has most of shoping malls and cluster 2 has moderate shopping malls. So. it would be better to start shopping mall in cluster 0.</p>
<p>But lets see other categories which would be potential ventures.</p>

In [48]:
venues_df['VenueCategory'].value_counts()

Indian Restaurant                           375
Café                                        144
Fast Food Restaurant                        140
Coffee Shop                                 113
Pizza Place                                 107
Ice Cream Shop                               91
Chinese Restaurant                           86
Restaurant                                   72
Bar                                          62
Lounge                                       55
Dessert Shop                                 54
Bakery                                       53
Pub                                          51
Snack Place                                  47
Vegetarian / Vegan Restaurant                47
Sandwich Place                               47
Train Station                                41
Gym / Fitness Center                         40
Asian Restaurant                             36
Italian Restaurant                           36
Seafood Restaurant                      

In [49]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = Ml_grouped['Neighborhoods']

for ind in np.arange(Ml_grouped.shape[0]):
    row_categories = Ml_grouped.iloc[ind, :].iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 1:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(41, 6)


Unnamed: 0,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Andheri,Indian Restaurant,Snack Place,Pizza Place,Gym / Fitness Center,Bar
1,Anushakti Nagar,Chinese Restaurant,Park,Pub,Bus Station,Food
2,Baiganwadi,Platform,Gym,Park,Coffee Shop,Food Truck
3,Bandra,Indian Restaurant,Café,Bakery,Dessert Shop,Bar
4,Bhandup,Indian Restaurant,Pizza Place,Train Station,Multiplex,Restaurant
5,Borivali,Indian Restaurant,Café,Ice Cream Shop,Restaurant,Chinese Restaurant
6,Charkop,Fast Food Restaurant,Coffee Shop,Lounge,Pizza Place,Indian Restaurant
7,Chembur,Indian Restaurant,Fast Food Restaurant,Pizza Place,Café,Restaurant
8,Dahisar,Chinese Restaurant,Indian Restaurant,Fast Food Restaurant,Ice Cream Shop,Café
9,Devipada,Indian Restaurant,Ice Cream Shop,Fast Food Restaurant,Café,Lounge


In [50]:
neighborhoods_venues_sorted['1st Most Common Venue'].value_counts(normalize = True)

Indian Restaurant       0.682927
Fast Food Restaurant    0.073171
Café                    0.073171
Chinese Restaurant      0.048780
Restaurant              0.024390
Shop & Service          0.024390
Coffee Shop             0.024390
Platform                0.024390
ATM                     0.024390
Name: 1st Most Common Venue, dtype: float64

In [51]:
neighborhoods_venues_sorted['2nd Most Common Venue'].value_counts(normalize = True)

Fast Food Restaurant    0.170732
Café                    0.146341
Pizza Place             0.121951
Indian Restaurant       0.097561
Coffee Shop             0.073171
Ice Cream Shop          0.073171
Pub                     0.048780
Train Station           0.048780
Restaurant              0.024390
Bakery                  0.024390
Plaza                   0.024390
Bookstore               0.024390
Snack Place             0.024390
Park                    0.024390
Gym                     0.024390
Food & Drink Shop       0.024390
Bar                     0.024390
Name: 2nd Most Common Venue, dtype: float64

In [52]:
neighborhoods_venues_sorted['3rd Most Common Venue'].value_counts(normalize = True)

Fast Food Restaurant    0.170732
Pizza Place             0.146341
Café                    0.097561
Coffee Shop             0.097561
Ice Cream Shop          0.073171
Bakery                  0.048780
Snack Place             0.048780
Multiplex               0.048780
Pub                     0.048780
Lounge                  0.024390
Indian Restaurant       0.024390
Train Station           0.024390
Chinese Restaurant      0.024390
Seafood Restaurant      0.024390
Park                    0.024390
Gym / Fitness Center    0.024390
Clothing Store          0.024390
Art Gallery             0.024390
Name: 3rd Most Common Venue, dtype: float64

In [53]:
neighborhoods_venues_sorted['4th Most Common Venue'].value_counts(normalize = True)

Coffee Shop                      0.146341
Chinese Restaurant               0.121951
Café                             0.097561
Fast Food Restaurant             0.097561
Vegetarian / Vegan Restaurant    0.097561
Restaurant                       0.073171
Ice Cream Shop                   0.073171
Pizza Place                      0.048780
Dessert Shop                     0.024390
Diner                            0.024390
Italian Restaurant               0.024390
Brewery                          0.024390
Gym / Fitness Center             0.024390
Multiplex                        0.024390
Train Station                    0.024390
Bakery                           0.024390
Bus Station                      0.024390
Hookah Bar                       0.024390
Name: 4th Most Common Venue, dtype: float64

In [115]:
neighborhoods_venues_sorted['5th Most Common Venue'].value_counts(normalize = True)

Restaurant                       0.097561
Train Station                    0.097561
Coffee Shop                      0.097561
Ice Cream Shop                   0.073171
Pizza Place                      0.073171
Café                             0.048780
Pub                              0.048780
Vegetarian / Vegan Restaurant    0.048780
Dessert Shop                     0.048780
Clothing Store                   0.024390
Food Truck                       0.024390
Seafood Restaurant               0.024390
Indian Restaurant                0.024390
Bakery                           0.024390
Gym                              0.024390
Multiplex                        0.024390
Bar                              0.024390
Hotel                            0.024390
Tea Room                         0.024390
Factory                          0.024390
Sports Bar                       0.024390
Food & Drink Shop                0.024390
Diner                            0.024390
Italian Restaurant               0

<p>We can see Mumbai's most common venue is Indian Restuarant. We can select Indian Restuarant as other category but there are 375 Indian Restuarants in Mumbai. So, it won't be nice to start another Indian Restuarant because it would get huge competition from other Indian Restuarants. When we see Ice Cream Shop it is one of the top five most common venues. So, let's explore this category.</p>

In [116]:
len(Ml_grouped[Ml_grouped["Ice Cream Shop"] > 0])

31

In [54]:
Ml_ice = Ml_grouped[["Neighborhoods","Ice Cream Shop"]]

In [55]:
Ml_ice.head()

Unnamed: 0,Neighborhoods,Ice Cream Shop
0,Andheri,0.04
1,Anushakti Nagar,0.0
2,Baiganwadi,0.0
3,Bandra,0.02
4,Bhandup,0.041667


In [56]:
# set number of clusters
kclusters = 3

kl_clustering = Ml_ice.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 1, 1, 0, 2, 0, 1, 0, 2], dtype=int32)

In [57]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
Ml_merged = Ml_ice.copy()

# add clustering labels
Ml_merged["Cluster Labels"] = kmeans.labels_

In [59]:
Ml_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
Ml_merged.head()

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels
0,Andheri,0.04,0
1,Anushakti Nagar,0.0,1
2,Baiganwadi,0.0,1
3,Bandra,0.02,1
4,Bhandup,0.041667,0


In [67]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Ml_merged = Ml_merged.join(Ml_df.set_index("Neighborhood"), on="Neighborhood")

print(Ml_merged.shape)
Ml_merged.head() # check the last columns!

ValueError: columns overlap but no suffix specified: Index(['Latitude', 'Longitude'], dtype='object')

In [61]:
# sort the results by Cluster Labels
print(Ml_merged.shape)
Ml_merged.sort_values(["Cluster Labels"], inplace=True)
Ml_merged

(41, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
0,Andheri,0.04,0,19.118459,72.841763
38,Wadala,0.030928,0,19.0172,72.85817
35,Uttan,0.047619,0,26.86634,80.93884
33,Thakur village,0.048387,0,19.2102,72.87541
32,"Sonapur, Bhandup",0.046512,0,19.16394,72.93544
29,Seven Bungalows,0.03,0,19.131342,72.816342
26,Mulund,0.041096,0,19.17183,72.95565
25,Mira Road,0.055556,0,19.280032,72.867932
22,Mahavir Nagar (Kandivali),0.026667,0,19.211982,72.837573
39,Western Suburbs (Mumbai),0.037975,0,19.19701,72.82768


In [62]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Ml_merged['Latitude'], Ml_merged['Longitude'], Ml_merged['Neighborhood'], Ml_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [63]:
map_clusters.save('map_clusters.html')

<h4>Cluster </h4>

In [64]:
Ml_merged.loc[Ml_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
0,Andheri,0.04,0,19.118459,72.841763
38,Wadala,0.030928,0,19.0172,72.85817
35,Uttan,0.047619,0,26.86634,80.93884
33,Thakur village,0.048387,0,19.2102,72.87541
32,"Sonapur, Bhandup",0.046512,0,19.16394,72.93544
29,Seven Bungalows,0.03,0,19.131342,72.816342
26,Mulund,0.041096,0,19.17183,72.95565
25,Mira Road,0.055556,0,19.280032,72.867932
22,Mahavir Nagar (Kandivali),0.026667,0,19.211982,72.837573
39,Western Suburbs (Mumbai),0.037975,0,19.19701,72.82768


<h4>Cluster 1</h4>

In [65]:
Ml_merged.loc[Ml_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
27,Mumbra,0.0,1,19.19054,73.02266
1,Anushakti Nagar,0.0,1,19.04283,72.92734
37,Vikhroli,0.01,1,19.11109,72.92781
36,Vashi,0.02,1,19.08465,72.90481
2,Baiganwadi,0.0,1,19.06294,72.92663
34,Tilak Nagar (Mumbai),0.0,1,18.99616,72.85279
3,Bandra,0.02,1,19.05437,72.84017
31,"Sion, Mumbai",0.014925,1,19.04359,72.86412
30,Shil Phata,0.0,1,19.14658,73.04005
40,Worli,0.02,1,19.00744,72.81688


<h4>Cluster 2</h4>

In [66]:
Ml_merged.loc[Ml_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
5,Borivali,0.07,2,19.22936,72.85751
9,Devipada,0.079545,2,19.22469,72.86605
10,Dombivli,0.111111,2,19.21275,73.08324
15,Jogeshwari,0.1,2,19.13792,72.84941


<h1>Observations:</h1>
<p>Cluster 0 has moderate Ice Cream Shops, Cluster 1 has no Ice Cream Shops and Cluster 2 has most of Ice Cream Shops. So, it is better to start Ice Cream Shop in Cluster 1 and most of areas in Cluster 1 are good areas and they are with no Ice Cream Shops. So, it is better to start Ice Cream Shops in these cities. Ice Cream Shops are most visted places than Shopping Malls in Mumbai. So, it is better to start Ice Cream Shop than Shopping mall.</p>
