In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests
import io
import folium
import geopy
from geopy.geocoders import Nominatim

#### Let' scrape a wikipedia(which needs serious donations, please donate) page to get the list of cities in telangana

In [2]:
tables=pd.read_html("https://en.wikipedia.org/wiki/List_of_cities_in_Telangana_by_population")
tables=tables[0]
tables

Unnamed: 0,Rank,City,District,Type,Population (2011),Ref
0,1,Hyderabad,Hyderabad,Greater M.Corp,6993262,[2]
1,2,Warangal,Warangal Urban,Greater M.Corp,704570,[3][4]
2,3,Nizamabad,Nizamabad,M.Corp,311152,[2]
3,4,Khammam,Khammam,M.Corp,305000,[5]
4,5,Karimnagar,Karimnagar,M.Corp,289821,[2]
5,6,Ramagundam,Peddapalli,M.Corp,242979,[2]
6,7,Mahabubnagar,Mahabubnagar,M,222573,[6]
7,8,Nalgonda,Nalgonda,M,154326,[7][2]
8,9,Adilabad,Adilabad,M,117167,[8]
9,10,Siddipet,Siddipet,M,111358,[9]


In [3]:
df_cities=tables.loc[:,['City','District']]
df_cities

Unnamed: 0,City,District
0,Hyderabad,Hyderabad
1,Warangal,Warangal Urban
2,Nizamabad,Nizamabad
3,Khammam,Khammam
4,Karimnagar,Karimnagar
5,Ramagundam,Peddapalli
6,Mahabubnagar,Mahabubnagar
7,Nalgonda,Nalgonda
8,Adilabad,Adilabad
9,Siddipet,Siddipet


#### Let's get the geographical location of the cities

In [8]:
city_latitude=[]
city_longitude=[]

for i in df_cities.index:
    address = df_cities.loc[i,"City"]+", "+df_cities.loc[i,"District"]+", India"
    geolocator = Nominatim(user_agent="telangana_explorer")
    location = geolocator.geocode(address)
    city_latitude.append(location.latitude)
    city_longitude.append(location.longitude)

print(city_latitude)
print(city_longitude)

[17.3616079, 17.9806094, 18.6732693, 17.2465351, 18.4346438, 18.7615156, 16.745847599999998, 17.0504406, 19.6759452, 18.1017739, 17.15271675, 16.8709882]
[78.4746286, 79.5982115, 78.0978477, 80.1500326, 79.1322648, 79.4787848, 77.99366805028575, 79.2669235, 78.5339895, 78.8520128, 79.61976164399894, 79.5613977]


In [9]:
df_cities['Latitude']=city_latitude
df_cities

Unnamed: 0,City,District,Latitude
0,Hyderabad,Hyderabad,17.361608
1,Warangal,Warangal Urban,17.980609
2,Nizamabad,Nizamabad,18.673269
3,Khammam,Khammam,17.246535
4,Karimnagar,Karimnagar,18.434644
5,Ramagundam,Peddapalli,18.761516
6,Mahabubnagar,Mahabubnagar,16.745848
7,Nalgonda,Nalgonda,17.050441
8,Adilabad,Adilabad,19.675945
9,Siddipet,Siddipet,18.101774


In [10]:
df_cities['Longitude']=city_longitude
df_cities

Unnamed: 0,City,District,Latitude,Longitude
0,Hyderabad,Hyderabad,17.361608,78.474629
1,Warangal,Warangal Urban,17.980609,79.598212
2,Nizamabad,Nizamabad,18.673269,78.097848
3,Khammam,Khammam,17.246535,80.150033
4,Karimnagar,Karimnagar,18.434644,79.132265
5,Ramagundam,Peddapalli,18.761516,79.478785
6,Mahabubnagar,Mahabubnagar,16.745848,77.993668
7,Nalgonda,Nalgonda,17.050441,79.266924
8,Adilabad,Adilabad,19.675945,78.53399
9,Siddipet,Siddipet,18.101774,78.852013


#### Now let's plot the cities in a map of Telangana

In [11]:
telangana_lat=18.1124
telangana_lng=79.0193

map_telangana = folium.Map(location=[telangana_lat, telangana_lng], zoom_start=8)

for lat, lng, city, dis in zip(df_cities['Latitude'], df_cities['Longitude'], df_cities['City'],df_cities['District']):
    label='{}, {}'.format(city, dis)
    folium.Marker(
    location=[lat,lng],
    color="blue",
    popup=label
    ).add_to(map_telangana)

map_telangana

In [12]:
CLIENT_ID = 'XXXX'
CLIENT_SECRET = 'XXXX'
VERSION = '20180605'
LIMIT=100
RADIUS=100

import json

In [13]:
url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&ll={},{}&v={}&limit={}&radius={}'.format(CLIENT_ID,CLIENT_SECRET,df_cities.loc[0,"Latitude"],
                                                                                            df_cities.loc[0,"Longitude"],VERSION,LIMIT,RADIUS)

In [14]:
result=requests.get(url).json()
result

{'meta': {'code': 200, 'requestId': '5f27b62a8f44ac7ed56d107f'},
 'response': {'headerLocation': 'Hyderabad',
  'headerFullLocation': 'Hyderabad',
  'headerLocationGranularity': 'city',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 17.3625079009, 'lng': 78.47556980115688},
   'sw': {'lat': 17.3607078991, 'lng': 78.47368739884313}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e1d712415207c4cbec540bd',
       'name': 'Charminar',
       'location': {'address': 'Old City',
        'lat': 17.361554582179913,
        'lng': 78.4746766090393,
        'labeledLatLngs': [{'label': 'display',
          'lat': 17.361554582179913,
          'lng': 78.4746766090393}],
        'distance': 7,
        'postalCode': '500002',
        'cc': 'IN',
        'city': 'Hyderaba

In [131]:
LIMIT=300
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        try:
            results=requests.get(url).json()['response']['groups'][0]['items']
        except:
            continue;
        venues_list.append([(
        name, 
        lat, 
        lng,
        v['venue']['name'],
        v['venue']['location']['lat'],
        v['venue']['location']['lng'],
        v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Let's get all the venues in the cities of Telangana

In [54]:
telangana_venues = getNearbyVenues(names=df_cities['City'],
                                   latitudes=df_cities['Latitude'],
                                   longitudes=df_cities['Longitude']
                                  )

Hyderabad
Warangal
Nizamabad
Khammam
Karimnagar
Ramagundam
Mahabubnagar
Nalgonda
Adilabad
Siddipet
Suryapet
Miryalaguda


In [18]:
telangana_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hyderabad,17.361608,78.474629,Chowmahala Palace,17.3593,78.471617,History Museum
1,Hyderabad,17.361608,78.474629,Charminar,17.361555,78.474677,Monument / Landmark
2,Hyderabad,17.361608,78.474629,Govind Ki Idly,17.364132,78.473771,South Indian Restaurant
3,Hyderabad,17.361608,78.474629,Nimrah Hotel,17.361136,78.474373,Café
4,Hyderabad,17.361608,78.474629,Cafe coffee day,17.361987,78.474908,Coffee Shop
5,Hyderabad,17.361608,78.474629,Choodi bazar,17.361951,78.474642,Clothing Store
6,Hyderabad,17.361608,78.474629,Nimrah cafe & bakery,17.361721,78.474626,Bakery
7,Warangal,17.980609,79.598212,Univercell World,17.980902,79.598823,Electronics Store
8,Warangal,17.980609,79.598212,Sri Lakshmi Chat Bandar,17.979325,79.596505,Indian Restaurant
9,Warangal,17.980609,79.598212,Radhika Theatre,17.979121,79.600078,Movie Theater


### As the number of Venues are small, let's just concentrate our analysis for Hyderabad region only

In [118]:
url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&ll={},{}&v={}&limit={}&radius={}'.format(CLIENT_ID,CLIENT_SECRET,df_cities.loc[0,"Latitude"],
                                                                                            df_cities.loc[0,"Longitude"],VERSION,LIMIT,RADIUS)

In [108]:
areas=pd.read_csv("https://raw.githubusercontent.com/saisreeshma/Telangana_analysis/master/datasets/Hyderabad_areas.csv")

In [109]:
areas

Unnamed: 0,Areas
0,Ameerpet
1,Sanathnagar
2,Khairatabad
3,Musheerabad
4,Amberpet
...,...
233,Kismatpur
234,Narsingi
235,Puppalguda
236,Rajiv Gandhi International Airport


#### Let's find the geographical location of every neighborhood in Hyderabad

In [97]:
city_latitude=[]
city_longitude=[]

for i in areas.index:
    try:
        address = areas.loc[i,"Areas"]+" Hyderabad, Telangana, India"
        geolocator = Nominatim(user_agent="hyderabad_explorer")
        location = geolocator.geocode(address)
        city_latitude.append(location.latitude)
        city_longitude.append(location.longitude)
    except:
        city_latitude.append("NA")
        city_longitude.append("NA")
print(city_latitude)
print(city_longitude)

[17.4375012, 17.4569654, 17.4117706, 17.4191423, 17.390263150000003, 17.3918524, 17.4337246, 'NA', 'NA', 17.4490055, 17.4308362, 17.4436222, 'NA', 17.4930841, 17.5286092, 17.4767464, 'NA', 17.5447029, 17.5022292, 17.4483438, 17.4846356, 'NA', 17.4025091, 'NA', 17.3684307, 17.3501617, 17.36116545, 17.3281147, 17.3942627, 17.3349277, 17.23092405, 17.4440199, 17.4452312, 17.4425252, 17.4268849, 17.4469233, 17.3950455, 17.4487999, 17.45167, 17.4508069, 17.4255053, 17.4286268, 17.4105409, 17.4047878, 17.4028042, 17.4029622, 17.4096785, 17.400939, 17.4040987, 17.405826, 17.4078455, 17.4012791, 17.3616079, 17.4071166, 17.4095495, 17.3975005, 17.4007635, 17.4070321, 17.3974359, 17.4019354, 17.3941335, 17.3901064, 17.3926121, 17.3367947, 17.4102641, 17.39004015, 17.3883755, 17.3894783, 17.389178, 17.3847309, 17.3857171, 'NA', 17.4315325, 17.4227022, 'NA', 17.3585578, 'NA', 17.4222908, 17.422247, 17.4256007, 17.4343736, 17.4155855, 17.4397016, 'NA', 17.4414311, 17.4412194, 17.4297326, 17.4285479

In [110]:
areas['Latitude']=city_latitude
areas['Longitude']=city_longitude
areas

Unnamed: 0,Areas,Latitude,Longitude
0,Ameerpet,17.4375,78.4483
1,Sanathnagar,17.457,78.4435
2,Khairatabad,17.4118,78.4622
3,Musheerabad,17.4191,78.4986
4,Amberpet,17.3903,78.5165
...,...,...,...
233,Kismatpur,17.3315,78.4036
234,Narsingi,,
235,Puppalguda,,
236,Rajiv Gandhi International Airport,17.2309,78.4318


In [111]:
areas=areas[~(areas["Latitude"]=="NA")]
print(areas.shape)

(191, 3)


In [128]:
areas['Areas']

0                                Ameerpet
1                             Sanathnagar
2                             Khairatabad
3                             Musheerabad
4                                Amberpet
                      ...                
230                               Attapur
231                            Bandlaguda
232                              Gandipet
233                             Kismatpur
236    Rajiv Gandhi International Airport
Name: Areas, Length: 191, dtype: object

In [120]:
hyderabad_venues = getNearbyVenues(names=areas.loc['Areas'],
                                   latitudes=areas['Latitude'],
                                   longitudes=areas['Longitude']
                                  )

Ameerpet
Sanathnagar
Khairatabad
Musheerabad
Amberpet
Nampally
Secunderabad
HITEC City
Jubilee Hills
Gachibowli
Kukatpally
Patancheru
Balanagar
Kompally
Alwal
Malkajgiri
Kapra
Uppal
Dilsukhnagar
LB Nagar
Saroornagar
Hayathnagar
Mehdipatnam
Rajendranagar
Shamshabad
Begumpet
SR Nagar
Prakash Nagar
Punjagutta
Balkampet
Bharat Nagar
Erragadda
Borabanda
Moti Nagar
Somajiguda
Raj Bhavan Road
Lakdikapool
Saifabad
A.C. Guards
Masab Tank
Chintal Basti
Chikkadpally
Himayatnagar
Ashok Nagar
Domalguda
Hyderguda
Ramnagar
Azamabad
Adikmet
Nallakunta
Shanker Mutt
RTC X Roads
Bagh Lingampally
Vidyanagar
Tilaknagar
Golnaka
Barkatpura
Shivam Road
Jamia Osmania
Kachiguda
Badichowdi
Abids
Aghapura
Koti
Bank Street
Chilkalguda
Kavadiguda
Madannapet
Mylargadda
Namalagundu
Padmarao Nagar
Pan bazar
Parsigutta
Patny
RP Road
Sindhi Colony
Sitaphalmandi
Tarnaka
Bowenpally
Karkhana
Marredpally
Sikh Village
Trimulgherry
Vikrampuri
Aliabad
Alijah Kotla
Asif Nagar
Azampura
Barkas
Bazarghat
Begum Bazaar
Chaderghat
Ch

ConnectionError: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?&client_id=5B3DP3MB2VUYBXM1U5C0FRPFNKVNL1O1TAHNONMREM5RWOBP&client_secret=WCEG545ZKN1NCDU2VTEWMPS023YC2LLJDUHQAPOMNFLRZAIN&v=20180605&ll=17.4939602,78.4008412&radius=1000&limit=300 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000250D556C4F0>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

In [133]:
hyderabad_venues_expand = getNearbyVenues(names=areas.loc[152:,'Areas'],
                                   latitudes=areas.loc[152:,'Latitude'],
                                   longitudes=areas.loc[152:,'Longitude']
                                  )

KPHB
Nizampet
Pragathi Nagar
Moosapet
BHEL Township
RC Puram
Fateh Nagar
Ferozguda
Old Bowenpally
Hasmathpet
Jeedimetla
Jagadgirigutta
Suraram
Old Alwal
Ammuguda
Gautham Nagar
Kakatiya Nagar
Vinayak Nagar
Moula-Ali
Neredmet
Old Neredmet
Safilguda
Sainikpuri
Yapral
A. S. Rao Nagar
Kamala Nagar
Kushaiguda
Cherlapally
Nagaram
Dammaiguda
Jawaharnagar
Rampally
Habsiguda
Ramanthapur
Boduppal
Nagole
Nacharam
Mallapur
Chengicherla
Pocharam
Kothapet
Gaddiannaram
Moosarambagh
Bairamalguda
Vanasthalipuram
Hastinapuram
Badangpet
Balapur
Champapet
Karmanghat
Lingojiguda
Meerpet
Ibrahim patnam
Gudimalkapur
Asif Nagar
Langar Houz
Mallepally
Padmanabha Nagar Colony
Red Hills
Attapur
Bandlaguda
Gandipet
Kismatpur
Rajiv Gandhi International Airport


In [158]:
venues=hyderabad_venues.append(hyderabad_venues_expand,ignore_index=True).reset_index()
venues.drop("index",axis=1,inplace=True)

In [159]:
venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A. S. Rao Nagar,12,12,12,12,12,12
Alwal,5,5,5,5,5,5
Amberpet,6,6,6,6,6,6
Ameerpet,40,40,40,40,40,40
Ammuguda,4,4,4,4,4,4
...,...,...,...,...,...,...
Suraram,3,3,3,3,3,3
Uppal,5,5,5,5,5,5
Vanasthalipuram,7,7,7,7,7,7
Vinayak Nagar,4,4,4,4,4,4


In [160]:
venues.groupby("Venue Category").count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ATM,29,29,29,29,29,29
Accessories Store,4,4,4,4,4,4
Afghan Restaurant,2,2,2,2,2,2
Airport,2,2,2,2,2,2
Airport Food Court,2,2,2,2,2,2
...,...,...,...,...,...,...
Tourist Information Center,1,1,1,1,1,1
Trail,1,1,1,1,1,1
Train Station,11,11,11,11,11,11
Vegetarian / Vegan Restaurant,12,12,12,12,12,12


In [161]:
hyderabad_onehot=pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")
hyderabad_onehot.drop("Neighborhood", axis=1, inplace=True)
hyderabad_onehot['Neighborhood']=venues['Neighborhood']

fixed_columns=[hyderabad_onehot.columns[-1]]+list(hyderabad_onehot.columns[:-1])
hyderabad_onehot=hyderabad_onehot[fixed_columns]
hyderabad_onehot

Unnamed: 0,Neighborhood,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Service,Airport Terminal,Arcade,...,Stadium,Supermarket,Taxi Stand,Tea Room,Tennis Court,Tourist Information Center,Trail,Train Station,Vegetarian / Vegan Restaurant,Women's Store
0,Ameerpet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Ameerpet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Ameerpet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Ameerpet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Ameerpet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
967,Rajiv Gandhi International Airport,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
968,Rajiv Gandhi International Airport,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
969,Rajiv Gandhi International Airport,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
970,Rajiv Gandhi International Airport,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [162]:
hyderabad_grouped=hyderabad_onehot.groupby("Neighborhood").mean().reset_index()
hyderabad_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Service,Airport Terminal,Arcade,...,Stadium,Supermarket,Taxi Stand,Tea Room,Tennis Court,Tourist Information Center,Trail,Train Station,Vegetarian / Vegan Restaurant,Women's Store
0,A. S. Rao Nagar,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
1,Alwal,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
2,Amberpet,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
3,Ameerpet,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,...,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025,0.0
4,Ammuguda,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,Suraram,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
85,Uppal,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
86,Vanasthalipuram,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0
87,Vinayak Nagar,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,...,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0


In [163]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Let's find the top 10 venues in every neighborhood

In [164]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = hyderabad_grouped['Neighborhood']

for ind in np.arange(hyderabad_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(hyderabad_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,A. S. Rao Nagar,Café,Fast Food Restaurant,Indian Restaurant,Coffee Shop,Bank,Electronics Store,Department Store,Pizza Place,Hotel,Deli / Bodega
1,Alwal,Gym,Grocery Store,Bakery,Breakfast Spot,Indian Restaurant,Women's Store,Duty-free Shop,Farm,Falafel Restaurant,Ethiopian Restaurant
2,Amberpet,Indian Restaurant,Bakery,Movie Theater,Pizza Place,Women's Store,Diner,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
3,Ameerpet,Indian Restaurant,Café,Clothing Store,Hotel,Sandwich Place,Bakery,Breakfast Spot,Chinese Restaurant,Pizza Place,Deli / Bodega
4,Ammuguda,Café,Fast Food Restaurant,Falafel Restaurant,Diner,Farmers Market,Farm,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Women's Store
...,...,...,...,...,...,...,...,...,...,...,...
84,Suraram,Bus Station,Multiplex,Café,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner
85,Uppal,Lounge,Chinese Restaurant,Light Rail Station,Flea Market,Bus Station,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dairy Store
86,Vanasthalipuram,ATM,Burger Joint,Hotel,Indian Restaurant,Movie Theater,Park,Coffee Shop,Airport Service,Flower Shop,Airport
87,Vinayak Nagar,Indian Restaurant,Asian Restaurant,Movie Theater,Women's Store,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner


#### Let's cluster the areas

In [165]:
from sklearn.cluster import KMeans
kclusters = 3

hyderabad_grouped_clustering = hyderabad_grouped.drop('Neighborhood', 1)


kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(hyderabad_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10].dtype 

dtype('int32')

In [166]:
areas.rename(columns={"Areas":"Neighborhood"}, inplace=True)
areas

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Ameerpet,17.4375,78.4483
1,Sanathnagar,17.457,78.4435
2,Khairatabad,17.4118,78.4622
3,Musheerabad,17.4191,78.4986
4,Amberpet,17.3903,78.5165
...,...,...,...
230,Attapur,17.3672,78.4307
231,Bandlaguda,17.3715,78.5727
232,Gandipet,17.4121,78.3962
233,Kismatpur,17.3315,78.4036


In [167]:
neighborhoods_venues_sorted.drop('Cluster Labels', axis=1, inplace=True)

KeyError: "['Cluster Labels'] not found in axis"

#### Let's merge the main dataset with the most common venues

In [168]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_.astype('int32'))
neighborhoods_venues_sorted["Cluster Labels"].dtype
hyderabad_merged = areas

hyderabad_merged = hyderabad_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
print(hyderabad_merged.shape)




(191, 14)


In [174]:
hyderabad_merged.reset_index(inplace=True)
hyderabad_merged.drop("index",inplace=True, axis=1)
hyderabad_merged

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ameerpet,17.4375,78.4483,0.0,Indian Restaurant,Café,Clothing Store,Hotel,Sandwich Place,Bakery,Breakfast Spot,Chinese Restaurant,Pizza Place,Deli / Bodega
1,Sanathnagar,17.457,78.4435,0.0,Bus Station,Train Station,Motorcycle Shop,Department Store,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop
2,Khairatabad,17.4118,78.4622,0.0,Multiplex,Indian Restaurant,Hotel,Coffee Shop,Vegetarian / Vegan Restaurant,Scenic Lookout,Middle Eastern Restaurant,Men's Store,Dessert Shop,Pizza Place
3,Musheerabad,17.4191,78.4986,0.0,Coffee Shop,Convenience Store,Ice Cream Shop,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop
4,Amberpet,17.3903,78.5165,0.0,Indian Restaurant,Bakery,Movie Theater,Pizza Place,Women's Store,Diner,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,Attapur,17.3672,78.4307,0.0,Shopping Mall,Multiplex,Bakery,Movie Theater,Department Store,Diner,Women's Store,Duty-free Shop,Farm,Falafel Restaurant
187,Bandlaguda,17.3715,78.5727,0.0,ATM,Garden,Breakfast Spot,Coffee Shop,Trail,Grocery Store,Department Store,Ethiopian Restaurant,Electronics Store,Gym
188,Gandipet,17.4121,78.3962,0.0,Indian Restaurant,Bakery,Restaurant,Auditorium,Fast Food Restaurant,Women's Store,Duty-free Shop,Farm,Falafel Restaurant,Ethiopian Restaurant
189,Kismatpur,17.3315,78.4036,1.0,ATM,Hardware Store,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner,Dessert Shop


In [175]:
hyderabad_merged.dropna()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ameerpet,17.4375,78.4483,0.0,Indian Restaurant,Café,Clothing Store,Hotel,Sandwich Place,Bakery,Breakfast Spot,Chinese Restaurant,Pizza Place,Deli / Bodega
1,Sanathnagar,17.457,78.4435,0.0,Bus Station,Train Station,Motorcycle Shop,Department Store,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop
2,Khairatabad,17.4118,78.4622,0.0,Multiplex,Indian Restaurant,Hotel,Coffee Shop,Vegetarian / Vegan Restaurant,Scenic Lookout,Middle Eastern Restaurant,Men's Store,Dessert Shop,Pizza Place
3,Musheerabad,17.4191,78.4986,0.0,Coffee Shop,Convenience Store,Ice Cream Shop,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop
4,Amberpet,17.3903,78.5165,0.0,Indian Restaurant,Bakery,Movie Theater,Pizza Place,Women's Store,Diner,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,Attapur,17.3672,78.4307,0.0,Shopping Mall,Multiplex,Bakery,Movie Theater,Department Store,Diner,Women's Store,Duty-free Shop,Farm,Falafel Restaurant
187,Bandlaguda,17.3715,78.5727,0.0,ATM,Garden,Breakfast Spot,Coffee Shop,Trail,Grocery Store,Department Store,Ethiopian Restaurant,Electronics Store,Gym
188,Gandipet,17.4121,78.3962,0.0,Indian Restaurant,Bakery,Restaurant,Auditorium,Fast Food Restaurant,Women's Store,Duty-free Shop,Farm,Falafel Restaurant,Ethiopian Restaurant
189,Kismatpur,17.3315,78.4036,1.0,ATM,Hardware Store,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner,Dessert Shop


In [176]:
hyderabad_merged = hyderabad_merged[~hyderabad_merged['Cluster Labels'].isnull()]
hyderabad_merged.shape

(89, 14)

#### Now let's plot the areas in map with a colour assigned to each cluster

In [194]:
latitude=17.3850
longitude=78.4867

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(hyderabad_merged['Latitude'], hyderabad_merged['Longitude'], hyderabad_merged['Neighborhood'], hyderabad_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        color=rainbow[int(cluster)-1],
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [195]:
cluster_1=hyderabad_merged[hyderabad_merged.loc[:,"Cluster Labels"]==0.0].reset_index(drop=True)
cluster_1

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ameerpet,17.4375,78.4483,0.0,Indian Restaurant,Café,Clothing Store,Hotel,Sandwich Place,Bakery,Breakfast Spot,Chinese Restaurant,Pizza Place,Deli / Bodega
1,Sanathnagar,17.457,78.4435,0.0,Bus Station,Train Station,Motorcycle Shop,Department Store,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop
2,Khairatabad,17.4118,78.4622,0.0,Multiplex,Indian Restaurant,Hotel,Coffee Shop,Vegetarian / Vegan Restaurant,Scenic Lookout,Middle Eastern Restaurant,Men's Store,Dessert Shop,Pizza Place
3,Musheerabad,17.4191,78.4986,0.0,Coffee Shop,Convenience Store,Ice Cream Shop,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop
4,Amberpet,17.3903,78.5165,0.0,Indian Restaurant,Bakery,Movie Theater,Pizza Place,Women's Store,Diner,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,Red Hills,17.3983,78.4642,0.0,Indian Restaurant,Hotel,Hyderabadi Restaurant,Bakery,Nightlife Spot,Science Museum,Café,Performing Arts Venue,Snack Place,Ice Cream Shop
70,Attapur,17.3672,78.4307,0.0,Shopping Mall,Multiplex,Bakery,Movie Theater,Department Store,Diner,Women's Store,Duty-free Shop,Farm,Falafel Restaurant
71,Bandlaguda,17.3715,78.5727,0.0,ATM,Garden,Breakfast Spot,Coffee Shop,Trail,Grocery Store,Department Store,Ethiopian Restaurant,Electronics Store,Gym
72,Gandipet,17.4121,78.3962,0.0,Indian Restaurant,Bakery,Restaurant,Auditorium,Fast Food Restaurant,Women's Store,Duty-free Shop,Farm,Falafel Restaurant,Ethiopian Restaurant


In [186]:
cluster_2=hyderabad_merged[hyderabad_merged.loc[:,"Cluster Labels"]==1.0].reset_index(drop=True)
cluster_2

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Patancheru,17.5286,78.2674,1.0,ATM,Music Venue,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Dessert Shop
1,Hayathnagar,17.3281,78.6045,1.0,Pharmacy,ATM,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner,Department Store
2,Rajendranagar,17.3349,78.4096,1.0,ATM,IT Services,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Dessert Shop
3,Moula-Ali,17.4668,78.5592,1.0,ATM,Café,Park,Coffee Shop,Greek Restaurant,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner
4,Mallapur,17.4405,78.5789,1.0,ATM,Scenic Lookout,Big Box Store,Diner,Dairy Store,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Fast Food Restaurant
5,Chengicherla,17.4588,78.5662,1.0,ATM,Pharmacy,Café,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner
6,Pocharam,17.5278,78.2674,1.0,ATM,Music Venue,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Dessert Shop
7,Badangpet,17.3224,78.528,1.0,ATM,Pharmacy,Mattress Store,Convenience Store,Cupcake Shop,Concert Hall,Dairy Store,Dance Studio,Deli / Bodega,Farmers Market
8,Balapur,17.3159,78.5251,1.0,ATM,Pharmacy,Dessert Shop,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Diner,Department Store
9,Karmanghat,17.341,78.533,1.0,ATM,Clothing Store,Indian Restaurant,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Duty-free Shop


### Let's analyze the two clusters

In [198]:
cluster_1.groupby("1st Most Common Venue").count().sort_values(by="Neighborhood",ascending=False)

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,Cluster Labels,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1st Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Indian Restaurant,23,23,23,23,23,23,23,23,23,23,23,23,23
Café,11,11,11,11,11,11,11,11,11,11,11,11,11
ATM,7,7,7,7,7,7,7,7,7,7,7,7,7
Lake,4,4,4,4,4,4,4,4,4,4,4,4,4
Train Station,3,3,3,3,3,3,3,3,3,3,3,3,3
Bus Station,2,2,2,2,2,2,2,2,2,2,2,2,2
Shopping Mall,2,2,2,2,2,2,2,2,2,2,2,2,2
Fast Food Restaurant,2,2,2,2,2,2,2,2,2,2,2,2,2
Pizza Place,2,2,2,2,2,2,2,2,2,2,2,2,2
History Museum,2,2,2,2,2,2,2,2,2,2,2,2,2


#### The most frequent "1st most common venue in firsts cluster is Indian Restaurant" followed by "Cafe". This is also the same sequence in 2nd most common venues too

In [199]:
cluster_1.groupby("3rd Most Common Venue").count().sort_values(by="Neighborhood",ascending=False)

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3rd Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Movie Theater,5,5,5,5,5,5,5,5,5,5,5,5,5
Café,5,5,5,5,5,5,5,5,5,5,5,5,5
Shopping Mall,4,4,4,4,4,4,4,4,4,4,4,4,4
Bakery,4,4,4,4,4,4,4,4,4,4,4,4,4
Indian Restaurant,4,4,4,4,4,4,4,4,4,4,4,4,4
Hotel,4,4,4,4,4,4,4,4,4,4,4,4,4
Women's Store,3,3,3,3,3,3,3,3,3,3,3,3,3
Park,3,3,3,3,3,3,3,3,3,3,3,3,3
Diner,3,3,3,3,3,3,3,3,3,3,3,3,3
Falafel Restaurant,3,3,3,3,3,3,3,3,3,3,3,3,3


#### Movie theatres and shopping malls are also popular in cluster 1

In [202]:
cluster_1.groupby("6th Most Common Venue").count().sort_values(by="Neighborhood",ascending=False)

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6th Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Falafel Restaurant,11,11,11,11,11,11,11,11,11,11,11,11,11
Farm,8,8,8,8,8,8,8,8,8,8,8,8,8
Diner,8,8,8,8,8,8,8,8,8,8,8,8,8
Women's Store,3,3,3,3,3,3,3,3,3,3,3,3,3
Fast Food Restaurant,3,3,3,3,3,3,3,3,3,3,3,3,3
Bakery,3,3,3,3,3,3,3,3,3,3,3,3,3
Café,3,3,3,3,3,3,3,3,3,3,3,3,3
Pizza Place,3,3,3,3,3,3,3,3,3,3,3,3,3
Dessert Shop,3,3,3,3,3,3,3,3,3,3,3,3,3
Stadium,2,2,2,2,2,2,2,2,2,2,2,2,2


#### Falafel Restaurant and Diners seems to have taken 5th and 6th most frequent available places in Hyderabad

### Now let's see the most frequent top venues in the second cluster

In [203]:
cluster_2.groupby("1st Most Common Venue").count().sort_values(by="Neighborhood",ascending=False)

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,Cluster Labels,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1st Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ATM,11,11,11,11,11,11,11,11,11,11,11,11,11
Pharmacy,1,1,1,1,1,1,1,1,1,1,1,1,1


#### ATMs are the most available venue in cluster 2

In [206]:
cluster_2.groupby("4th Most Common Venue").count().sort_values(by="Neighborhood",ascending=False)

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4th Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Farm,3,3,3,3,3,3,3,3,3,3,3,3,3
Farmers Market,3,3,3,3,3,3,3,3,3,3,3,3,3
Diner,2,2,2,2,2,2,2,2,2,2,2,2,2
Coffee Shop,1,1,1,1,1,1,1,1,1,1,1,1,1
Convenience Store,1,1,1,1,1,1,1,1,1,1,1,1,1
Dessert Shop,1,1,1,1,1,1,1,1,1,1,1,1,1
Restaurant,1,1,1,1,1,1,1,1,1,1,1,1,1


#### Music Venues and Farms are the next most available venues in cluster 2

## Final Suggestion: Any investment other than Indian Restaurant and Cafes is better, considering there's a market for it. Movie theatres, Shopping Malls and music venues are lagging behind in the frequency. So investing in them would make a better sense as there's a market for it but the availability is lesser.