#### Before we get the data and start exploring it, let's download all the dependencies that we will need.


In [1]:
#!pip install geopy
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

#! pip install folium==0.5.0
import folium # plotting library

import requests # library to handle requests
from bs4 import BeautifulSoup
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print("Libraries Imported")

Libraries Imported


#### In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent my_agent, as shown below.

In [2]:
address = 'New Delhi, IN'

geolocator = Nominatim(user_agent="my_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The latitude and longitude of New Delhi are {},{}.".format(latitude, longitude)) 

The latitude and longitude of New Delhi are 28.6138954,77.2090057.


#### Importing the datatset

The dataset here is availabe on kaggle. It returns the neighborhood names with their borough, latitude and longitude. The dataset (delhi) consists of 185 rows and 4 columns out of which around 20 return nan for latitude and longitude values.we then replace these missing values with values that are avaialable online.


In [3]:
delhi = pd.read_csv('D:\my_notebook\Coursera\delhi_dataSet.csv')
delhi.drop('Unnamed: 0', axis=1, inplace=True)
print(delhi.shape)
delhi.head()

(185, 4)


Unnamed: 0,Borough,Neighborhood,latitude,longitude
0,North West Delhi,Adarsh Nagar,28.614192,77.071541
1,North West Delhi,Ashok Vihar,28.699453,77.184826
2,North West Delhi,Azadpur,28.707657,77.175547
3,North West Delhi,Bawana,28.79966,77.032885
4,North West Delhi,Begum Pur,,


#### Finding  any missing values since missing values cannot be visualized using folium.

In [4]:
delhi[delhi.isna().any(axis=1)].head()

Unnamed: 0,Borough,Neighborhood,latitude,longitude
4,North West Delhi,Begum Pur,,
16,North West Delhi,Rohini Sub City,,
24,North Delhi,Ghantewala,,
25,North Delhi,Gulabi Bagh,,
35,North Delhi,Sadar Bazaar,,


In [5]:
delhi.iloc[[4,16,25,35,42,52,53,64,74,90,109,117,129,141,144,145,162,174,180,182],[2]]=[28.72714,28.7383,28.40291,28.658813,28.6656,
                                                                                      28.6818,28.7259,28.637235,28.576834,28.6640,
                                                                                      28.5571,28.5755,28.4994,28.602124,28.609675,
                                                                                      28.354278,28.6007,28.6057,28.6329415,28.6196]

In [6]:
delhi.iloc[[4,16,25,35,42,52,53,64,74,90,109,117,129,141,144,145,162,174,180,182],[3]]=[77.05948,77.0822,77.11318,77.216742,77.2168,
                                                                                       77.2579,77.2628,77.182634,77.205884,77.2712,
                                                                                       77.2369,77.2274,77.2918,77.240709,77.089542,
                                                                                       77.0257044,77.1031,77.2942,77.1058448,77.0550]

In [7]:
delhi[delhi.isna().any(axis=1)].head()

Unnamed: 0,Borough,Neighborhood,latitude,longitude
24,North Delhi,Ghantewala,,
65,Central Delhi,Sadar Bazaar,,


In [8]:
delhi.dropna(inplace=True)
delhi.reset_index(drop=True,inplace=True)
print(delhi.shape)
delhi.head()

(183, 4)


Unnamed: 0,Borough,Neighborhood,latitude,longitude
0,North West Delhi,Adarsh Nagar,28.614192,77.071541
1,North West Delhi,Ashok Vihar,28.699453,77.184826
2,North West Delhi,Azadpur,28.707657,77.175547
3,North West Delhi,Bawana,28.79966,77.032885
4,North West Delhi,Begum Pur,28.72714,77.05948


In [9]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(delhi['Borough'].unique()),
        delhi.shape[0])
    )

The dataframe has 9 boroughs and 183 neighborhoods.


In [10]:
print(delhi['Borough'].unique())

['North West Delhi' 'North Delhi' 'North East Delhi' 'Central Delhi'
 'New Delhi' 'East Delhi' 'South Delhi' 'South West Delhi' 'West Delhi']


In [11]:
delhi.groupby('Borough').count()

Unnamed: 0_level_0,Neighborhood,latitude,longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Central Delhi,7,7,7
East Delhi,19,19,19
New Delhi,10,10,10
North Delhi,26,26,26
North East Delhi,13,13,13
North West Delhi,18,18,18
South Delhi,49,49,49
South West Delhi,21,21,21
West Delhi,20,20,20


#### Folium library in Python helps us analyze the location and geospatial data with ease and lets us create interactive maps.
Creating a map of Delhi with neighborhoods superimposed on top of it.

In [12]:
map_Delhi = folium.Map(location=[latitude,longitude], zoom_start=10, min_zoom=8,max_zoom=13)

# add markers

for lat,long,borough,neighborhood in zip(delhi['latitude'],delhi['longitude'],delhi['Borough'],delhi['Neighborhood']):
    label = '{},{}'.format(neighborhood,borough)
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        parse_html=False).add_to(map_Delhi)

map_Delhi


Let's simplify the above map and segment and cluster only the neighborhoods in South Delhi since it returns the maximum number of neighborhoods.
So let's slice the original dataframe and create a new dataframe of the South Delhi data.

In [13]:
south_data = delhi[delhi['Borough'] =='South Delhi'].reset_index(drop=True)
south_data.head()

Unnamed: 0,Borough,Neighborhood,latitude,longitude
0,South Delhi,Alaknanda,28.529336,77.251632
1,South Delhi,Chhattarpur,28.507007,77.175417
2,South Delhi,Chittaranjan Park,28.538752,77.249249
3,South Delhi,Dayanand Colony,28.5622,77.247613
4,South Delhi,Defence Colony,28.571791,77.23201


Getting the geographical coordinates of South Delhi.

In [14]:
address = 'South Delhi, IN'

geolocator = Nominatim(user_agent="my_agent")
location = geolocator.geocode(address)
Latitude = location.latitude
Longitude = location.longitude
print('The geograpical coordinate of South Delhi are {}, {}.'.format(Latitude, Longitude))

The geograpical coordinate of South Delhi are 28.485169399999997, 77.19637972381611.


#### Visualizing the neighborhoods of South Delhi.

In [23]:
map_SouthDelhi = folium.Map(location=[Latitude,Longitude], zoom_start=11, min_zoom=8,max_zoom=12)

# add markers

for lat,long,borough,neighborhood in zip(south_data['latitude'],south_data['longitude'],south_data['Borough'],south_data['Neighborhood']):
    label = '{},{}'.format(neighborhood,borough)
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        parse_html=False).add_to(map_SouthDelhi)

map_SouthDelhi


Defining Foursquare Credentials and Version.

In [16]:
CLIENT_ID = 'F5A2QJBCX32T0KJN1AVNINKFDPV1SHVFOEVMMKWNVU2OMBFY' # your Foursquare ID
CLIENT_SECRET = 'AG430MTEUD3BRJ0OVL1HA2QFXGQTJVF15FRABR1I2WGJRHVE' # your Foursquare Secret
ACCESS_TOKEN = 'MIDEFYCQJKUGEHLRDLM5WHNFUYD1VUN033W1WWU4NLSSSJCA' # your FourSquare Access Token
VERSION = '20210423' # Foursquare API version
LIMIT = 100# A default Foursquare API limit value
radius = 500

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: F5A2QJBCX32T0KJN1AVNINKFDPV1SHVFOEVMMKWNVU2OMBFY
CLIENT_SECRET:AG430MTEUD3BRJ0OVL1HA2QFXGQTJVF15FRABR1I2WGJRHVE


In [17]:
# Get the neighborhood's name.

#west_data.loc[1, 'Neighborhood']

#Get the neighborhood's latitude and longitude values.

#neighborhood_latitude = west_data.loc[1, 'latitude'] # neighborhood latitude value
#neighborhood_longitude = west_data.loc[1, 'longitude'] # neighborhood longitude value
#neighborhood_name = west_data.loc[1, 'Neighborhood'] # neighborhood name

#print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               #neighborhood_latitude, 
                                                               #neighborhood_longitude))
        
#Limit = 100
#radius = 500
#url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,Latitude, Longitude,ACCESS_TOKEN, VERSION,radius, LIMIT)
#results = requests.get(url).json()
#results
# assign relevant part of JSON to venues
#venues = results['response']['venues']

# tranform venues into a dataframe
#dataframe = pd.json_normalize(venues)
#dataframe.head()
#filtered_columns = ['name', 'categories', 'location.address']
#dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
#def get_category_type(row):
#    try:
#        categories_list = row['categories']
#   except:
#        categories_list = row['venue.categories']
        
#    if len(categories_list) == 0:
#        return None
#    else:
#        return categories_list[0]['name']

# filter the category for each row
#dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)
#dataframe_filtered = dataframe_filtered.rename(columns={'location.address': 'Address'})
#dataframe_filtered.head(20)

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
southdelhi_venues = getNearbyVenues(names = south_data['Neighborhood'],
                                   latitudes = south_data['latitude'],
                                   longitudes = south_data['longitude'])

Alaknanda
Chhattarpur
Chittaranjan Park
Dayanand Colony
Defence Colony
East of Kailash
Friends Colony
Govindpuri
Greater Kailash
Green Park
Gulmohar Park
Hauz Khas
Hauz Khas Village
Jaitpur
Jamroodpur Village
Jangpura
Jasola
Jor Bagh
Kailash Colony
Kalkaji
Khanpur
Khirki Village
Kotla Mubarakpur
Lajpat Nagar
Lodi Colony
Maharani Bagh
Malviya Nagar
Mehrauli
Neeti Bagh
Nehru Place
Netaji Nagar
New Friends Colony
Nizamuddin West
Okhla
Pulpehaladpur
Safdarjung Enclave
Sainik Farm
Saket
Sangam Vihar
Sarita Vihar
Sarojini Nagar
Sarvodaya Enclave
Shahpur Jat
Siri Fort
South Extension
Sriniwaspuri
Sundar Nagar[1]
Tughlaqabad
Badarpur


In [25]:
southdelhi_venues.shape

(498, 7)

### the getNearbyVenues function rturn 498 venues  for South Delhi.

In [27]:
southdelhi_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Alaknanda,28.529336,77.251632,City Of Joy,28.532685,77.253003,Restaurant
1,Alaknanda,28.529336,77.251632,Qureshi's,28.52783,77.246837,BBQ Joint
2,Alaknanda,28.529336,77.251632,Qureshi Kabab,28.52775,77.246966,BBQ Joint
3,Alaknanda,28.529336,77.251632,"Cafe Coffee Day, Alaknanda",28.527905,77.250254,Coffee Shop
4,Alaknanda,28.529336,77.251632,Subway,28.526833,77.252821,New American Restaurant


In [28]:
southdelhi_venues.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
493,Tughlaqabad,28.511192,77.262327,Tughlaqabad Fort,28.511298,77.261902,Castle
494,Badarpur,28.503638,77.301976,Big Boss Tattoo Studio,28.503961,77.301828,Spa
495,Badarpur,28.503638,77.301976,Faridabad Railway Station,28.5046,77.299299,Train Station
496,Badarpur,28.503638,77.301976,Tughlaqabad Metro Station,28.502379,77.299231,Light Rail Station
497,Badarpur,28.503638,77.301976,HDFC Bank ATM,28.500883,77.301707,ATM
