# Impoting the required libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import folium
from sklearn.cluster import KMeans

# web scrapping

In [2]:
url='https://simple.wikipedia.org/wiki/List_of_districts_in_Delhi'
page=requests.get(url).text
soup=BeautifulSoup(page,'html.parser')
lists=[]
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of districts in Delhi - Simple English Wikipedia, the free encyclopedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"35a9f4cf-cab6-4361-a6a5-cbf9fbaa1873","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_districts_in_Delhi","wgTitle":"List of districts in Delhi","wgCurRevisionId":6890806,"wgRevisionId":6890806,"wgArticleId":598674,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Pages with citations lacking titles","CS1 errors: external links","Pages using 

# clearing the data to the dataframe

In [11]:
lists=[]
for row in soup.find_all('table',class_='wikitable')[0].findAll('a'):
    lists.append(row.get('title'))
l=[]
for i in range(56):
    if not 'not yet started' in str(lists[i]):
        if not 'None' in str(lists[i]):
            l.append(lists[i])
l=list(set(l))
l.remove('Alipur (Delhi)')
l.remove('Shahdara District')
l

['North Delhi',
 'South Delhi',
 'East Delhi',
 'North West Delhi',
 'West Delhi',
 'North East Delhi',
 'Shahdara',
 'South West Delhi',
 'Central Delhi',
 'New Delhi',
 'South East Delhi']

In [12]:
df=pd.DataFrame(l)
df.head()

Unnamed: 0,0
0,North Delhi
1,South Delhi
2,East Delhi
3,North West Delhi
4,West Delhi


In [13]:
df=df.reset_index(drop=True)
df.columns=['Neighborhood']
df

Unnamed: 0,Neighborhood
0,North Delhi
1,South Delhi
2,East Delhi
3,North West Delhi
4,West Delhi
5,North East Delhi
6,Shahdara
7,South West Delhi
8,Central Delhi
9,New Delhi


In [14]:
from geopy.geocoders import Nominatim

# finding the locations of all the neighborhood

In [15]:
locdf=pd.DataFrame(columns=['latitude','longitude'])
l=[]
for hood in df['Neighborhood']:
    address=hood+",Delhi,india"
    geolocator=Nominatim(user_agent='india_explorer')
    location=geolocator.geocode(address)
    lat=location.latitude
    lng=location.longitude
    locdf=locdf.append({'latitude':lat,'longitude':lng},ignore_index=True)

In [16]:
locdf

Unnamed: 0,latitude,longitude
0,28.614179,77.202266
1,28.485169,77.19638
2,28.620477,77.309181
3,28.614179,77.202266
4,28.647952,77.085565
5,28.723308,77.266857
6,28.673333,77.289025
7,28.586448,76.979153
8,28.698548,77.219391
9,28.614179,77.202266


In [17]:
merge=df.join(locdf)

In [18]:
merge

Unnamed: 0,Neighborhood,latitude,longitude
0,North Delhi,28.614179,77.202266
1,South Delhi,28.485169,77.19638
2,East Delhi,28.620477,77.309181
3,North West Delhi,28.614179,77.202266
4,West Delhi,28.647952,77.085565
5,North East Delhi,28.723308,77.266857
6,Shahdara,28.673333,77.289025
7,South West Delhi,28.586448,76.979153
8,Central Delhi,28.698548,77.219391
9,New Delhi,28.614179,77.202266


In [19]:
address = 'Delhi, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Delhi, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Delhi, India 28.6517178, 77.2219388.


In [20]:

# create map of Delhi using latitude and longitude values
map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(merge['latitude'], merge['longitude'], merge['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map)  
    
map

In [21]:
CLIENT_ID = 'ACN5C2N2QOBOF5XYBKBID1TEOGP1WMFBCUQBDGWTZOFKAJJV' # your Foursquare ID
CLIENT_SECRET = '0TGL1GWJFNNLQ4X2ERY2ESC03QB5UZFPXLSR2N4OUL00RD0Q' # your Foursquare Secret
VERSION = '20200524'
LIMIT=10

In [24]:


def get_nearby_venues(names,latitudes,longitudes,radius=2000):
    venue_list=[]
    for name,lat,lng in zip(names,latitudes,longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        results=requests.get(url).json()['response']['groups'][0]['items']
        venue_list.append([(name,lat,lng,v['venue']['name'],v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name']) for v in results])
    nearby_venues=pd.DataFrame([item for venue in venue_list for item in venue])
    nearby_venues.columns=['Neighborhood','latitude','longitude','venue','venue.latitude','venue.longitude','venue.category']
    return (nearby_venues)

In [31]:
venue_data=get_nearby_venues(merge['Neighborhood'],merge['latitude'],merge['longitude'])
venue_data


Unnamed: 0,Neighborhood,latitude,longitude,venue,venue.latitude,venue.longitude,venue.category
0,North Delhi,28.614179,77.202266,Gurudwara Sri Rakabganj Sahibji,28.618296,77.205269,Spiritual Center
1,North Delhi,28.614179,77.202266,Pandey Paan,28.622249,77.201075,Smoke Shop
2,North Delhi,28.614179,77.202266,Hauz Khas Social,28.613939,77.209021,Music Venue
3,North Delhi,28.614179,77.202266,Tamra,28.620543,77.218174,Restaurant
4,North Delhi,28.614179,77.202266,Nehru Memorial Museum And Library | नेहरू स्मा...,28.603001,77.199182,History Museum
5,North Delhi,28.614179,77.202266,Amreli,28.606609,77.188631,Café
6,North Delhi,28.614179,77.202266,Spa At Shangri-La,28.620869,77.218289,Spa
7,North Delhi,28.614179,77.202266,Jakoi,28.605239,77.187581,Northeast Indian Restaurant
8,North Delhi,28.614179,77.202266,Indira Gandhi Memorial Museum | इंदिरा गांधी स...,28.599932,77.206100,History Museum
9,North Delhi,28.614179,77.202266,Masala Library,28.618814,77.218090,Indian Restaurant


# Analyze Each Neighborhood

In [32]:
delhi_onehot=pd.get_dummies(venue_data[['venue.category']],prefix='',prefix_sep='')
delhi_onehot['Neighborhood']=venue_data['Neighborhood']
c=[delhi_onehot.columns[-1]] + list(delhi_onehot.columns[:-1])
delhi_onehot=delhi_onehot[c]
delhi_onehot.head()

Unnamed: 0,Neighborhood,ATM,Athletics & Sports,BBQ Joint,Bakery,Big Box Store,Café,Chinese Restaurant,Clothing Store,Coffee Shop,...,Pizza Place,Pub,Resort,Restaurant,Sandwich Place,Smoke Shop,Spa,Spiritual Center,Tibetan Restaurant,Train Station
0,North Delhi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,North Delhi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,North Delhi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,North Delhi,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,North Delhi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
delhi=delhi_onehot.groupby('Neighborhood').mean().reset_index()
delhi

Unnamed: 0,Neighborhood,ATM,Athletics & Sports,BBQ Joint,Bakery,Big Box Store,Café,Chinese Restaurant,Clothing Store,Coffee Shop,...,Pizza Place,Pub,Resort,Restaurant,Sandwich Place,Smoke Shop,Spa,Spiritual Center,Tibetan Restaurant,Train Station
0,Central Delhi,0.0,0.0,0.0,0.1,0.0,0.3,0.1,0.0,0.2,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
1,East Delhi,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0
2,New Delhi,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.1,0.1,0.1,0.0,0.0
3,North Delhi,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.1,0.1,0.1,0.0,0.0
4,North East Delhi,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,North West Delhi,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.1,0.1,0.1,0.0,0.0
6,Shahdara,0.0,0.0,0.1,0.0,0.1,0.1,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
7,South Delhi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.1,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0
8,South East Delhi,0.0,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
9,South West Delhi,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0


In [36]:
len(delhi[delhi['Café']>0])

7

In [37]:
delhi_indian=delhi[['Neighborhood','Café']]
delhi_indian

Unnamed: 0,Neighborhood,Café
0,Central Delhi,0.3
1,East Delhi,0.2
2,New Delhi,0.1
3,North Delhi,0.1
4,North East Delhi,0.0
5,North West Delhi,0.1
6,Shahdara,0.1
7,South Delhi,0.0
8,South East Delhi,0.2
9,South West Delhi,0.0


# clustering neighborhood

In [38]:
kcluster=3
delhi_clustering=delhi_indian.drop('Neighborhood',axis=1)
kmeans=KMeans(n_clusters=kcluster,random_state=0).fit(delhi_clustering)
delhi_indian.insert(0,'Cluster',kmeans.labels_)


In [39]:
delhi_indian

Unnamed: 0,Cluster,Neighborhood,Café
0,0,Central Delhi,0.3
1,0,East Delhi,0.2
2,2,New Delhi,0.1
3,2,North Delhi,0.1
4,1,North East Delhi,0.0
5,2,North West Delhi,0.1
6,2,Shahdara,0.1
7,1,South Delhi,0.0
8,0,South East Delhi,0.2
9,1,South West Delhi,0.0


In [40]:
delhi_indian=delhi_indian.merge(merge,on='Neighborhood')
delhi_indian

Unnamed: 0,Cluster,Neighborhood,Café,latitude,longitude
0,0,Central Delhi,0.3,28.698548,77.219391
1,0,East Delhi,0.2,28.620477,77.309181
2,2,New Delhi,0.1,28.614179,77.202266
3,2,North Delhi,0.1,28.614179,77.202266
4,1,North East Delhi,0.0,28.723308,77.266857
5,2,North West Delhi,0.1,28.614179,77.202266
6,2,Shahdara,0.1,28.673333,77.289025
7,1,South Delhi,0.0,28.485169,77.19638
8,0,South East Delhi,0.2,28.544441,77.272873
9,1,South West Delhi,0.0,28.586448,76.979153


In [41]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kcluster)
ys = [i+x+(i*x)**2 for i in range(kcluster)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(delhi_indian['latitude'], delhi_indian['longitude'], delhi_indian['Neighborhood'],delhi_indian['Cluster']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# analyzing each cluster

In [42]:
delhi_indian.loc[delhi_indian['Cluster'] == 0]


Unnamed: 0,Cluster,Neighborhood,Café,latitude,longitude
0,0,Central Delhi,0.3,28.698548,77.219391
1,0,East Delhi,0.2,28.620477,77.309181
8,0,South East Delhi,0.2,28.544441,77.272873


In [43]:
delhi_indian.loc[delhi_indian['Cluster'] == 1]


Unnamed: 0,Cluster,Neighborhood,Café,latitude,longitude
4,1,North East Delhi,0.0,28.723308,77.266857
7,1,South Delhi,0.0,28.485169,77.19638
9,1,South West Delhi,0.0,28.586448,76.979153
10,1,West Delhi,0.0,28.647952,77.085565


In [44]:
delhi_indian.loc[delhi_indian['Cluster'] == 2]


Unnamed: 0,Cluster,Neighborhood,Café,latitude,longitude
2,2,New Delhi,0.1,28.614179,77.202266
3,2,North Delhi,0.1,28.614179,77.202266
5,2,North West Delhi,0.1,28.614179,77.202266
6,2,Shahdara,0.1,28.673333,77.289025
