# IBM Data Science capstone course 

## _Opening a new cricket stadium in India_
***
### 1. Import libraries

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import json 
from geopy.geocoders import Nominatim 
import geocoder
import requests 
from bs4 import BeautifulSoup 
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn import metrics
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
print("Libraries imported.")

Libraries imported.


In [2]:
# define a function to get coordinates of cities
def get_latlng(city):
    co_ords = None
    while(co_ords is None):
        g = geocoder.arcgis('{}'.format(city))
        co_ords = g.latlng
    return co_ords

### 2. Obtain top 100 cities in India

In [3]:
data = requests.get("https://www.nriol.com/india-statistics/biggest-cities-india.asp").text

In [4]:
soup = BeautifulSoup(data, 'html.parser')
cities_list = []

In [5]:
table = soup.find("table", class_="table table-bordered back-white")
table = table.tbody.find_all("tr")
temp1 = []
temp2 = []
for i in range(0,7):
    temp1.append(table[i].find_all("u")[0].text.strip())

for j in range(7,len(table)):
    temp2.append(table[j].find_all("td")[1].text.strip())
    
cities_list = temp1 + temp2

len(cities_list)

100

In [6]:
cities_list[:5]

['Mumbai', 'Delhi', 'Bangalore', 'Hyderabad', 'Ahmedabad']

In [7]:
cities_df = pd.DataFrame({"City": cities_list})
print(cities_df.shape)
cities_df

(100, 1)


Unnamed: 0,City
0,Mumbai
1,Delhi
2,Bangalore
3,Hyderabad
4,Ahmedabad
5,Chennai
6,Kolkata
7,Surat
8,Pune
9,Jaipur


In [8]:
coords_cities = [ get_latlng(city) for city in cities_df["City"].tolist() ]
for i in coords_cities:
    i[0] = '{0:.8f}'.format(i[0])
    i[1] = '{0:.8f}'.format(i[1])

df_coords = pd.DataFrame(coords_cities, columns=['Latitude', 'Longitude'])

In [9]:
cities_df['Latitude'] = df_coords['Latitude']
cities_df['Longitude'] = df_coords['Longitude']

print(cities_df.shape)
cities_df.head()

(100, 3)


Unnamed: 0,City,Latitude,Longitude
0,Mumbai,18.94017,72.83483
1,Delhi,28.6341,77.21689
2,Bangalore,12.96618,77.5869
3,Hyderabad,17.39487,78.47076
4,Ahmedabad,23.02776,72.60027


In [10]:
cities_map = folium.Map(location=[20.5937, 78.9629], zoom_start=4)

for lat, long, city in zip(cities_df['Latitude'], cities_df['Longitude'], cities_df['City']):
    label = '{}'.format(cities_df)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(cities_map)  
    
cities_map

### 3. Get data of cities with existing cricket stadiums

In [11]:
data = requests.get("https://en.wikipedia.org/wiki/List_of_international_cricket_grounds_in_India").text
soup = BeautifulSoup(data, 'html.parser')
stadium_cities_list = []

In [12]:
info = soup.find("table",{"class":"wikitable sortable"})
info = info.find_all("a")
temp = []
for link in info:
    temp.append(link.get('title'))
for i in range(1,len(temp),3):
    stadium_cities_list.append(temp[i])
stadium_cities_list

['Kolkata',
 'Chennai',
 'New Delhi',
 'Mumbai',
 'Kanpur',
 'Bengaluru',
 'Mumbai',
 'Cuttack',
 'Ahmedabad',
 'Mohali',
 'Visakhapatnam',
 'Hyderabad',
 'Indore',
 'Nagpur',
 'Pune',
 'Rajkot',
 'Ranchi',
 'Dharamshala',
 'Greater Noida',
 'Guwahati',
 'Thiruvananthapuram',
 'Dehradun',
 'Lucknow']

In [13]:
print(len(stadium_cities_list))
stadium_cities_df = pd.DataFrame({"City with stadium": stadium_cities_list})
stadium_cities_df

23


Unnamed: 0,City with stadium
0,Kolkata
1,Chennai
2,New Delhi
3,Mumbai
4,Kanpur
5,Bengaluru
6,Mumbai
7,Cuttack
8,Ahmedabad
9,Mohali


In [14]:
coords_stadium_cities = [ get_latlng(city) for city in stadium_cities_df["City with stadium"].tolist() ]
for i in coords_stadium_cities:
    i[0] = '{0:.8f}'.format(i[0])
    i[1] = '{0:.8f}'.format(i[1])
    
df_coords = pd.DataFrame(coords_stadium_cities, columns=['Latitude', 'Longitude'])
stadium_cities_df['Latitude'] = df_coords['Latitude']
stadium_cities_df['Longitude'] = df_coords['Longitude']
print(stadium_cities_df.shape)
stadium_cities_df.head()

(23, 3)


Unnamed: 0,City with stadium,Latitude,Longitude
0,Kolkata,22.57053,88.37124
1,Chennai,13.08362,80.28252
2,New Delhi,28.63095,77.21721
3,Mumbai,18.94017,72.83483
4,Kanpur,26.43562,80.32986


In [15]:
for lat, long, city in zip(stadium_cities_df['Latitude'], stadium_cities_df['Longitude'], stadium_cities_df['City with stadium']):
    label = '{}'.format(stadium_cities_df)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=1,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.2).add_to(cities_map)  
    
cities_map

### 4. Get data of cities with airports in India

In [16]:
data = requests.get("https://www.mapsofindia.com/air-network/international-airport-map.htm").text
soup = BeautifulSoup(data, 'html.parser')
intl_cities = []

In [17]:
intl_airports = soup.find_all("table")[3]
intl_airports = intl_airports.find_all("td")
for i in range(1,len(intl_airports),3):
    intl_cities.append(intl_airports[i].text)

In [18]:
data = requests.get("https://www.mapsofindia.com/air-network/domestic-airport-map.htm").text
soup = BeautifulSoup(data, 'html.parser')
dom_cities = []

In [19]:
dom_airports = soup.find_all("table")[9]
dom_airports = dom_airports.find_all("td")
for i in range(1,len(dom_airports)-5,5):
    dom_cities.append(dom_airports[i].text)

In [20]:
airport_cities_list = dom_cities + intl_cities
airport_cities_list = list(set(airport_cities_list))
len(list(set(airport_cities_list)))

114

In [21]:
airport_cities_df = pd.DataFrame({"City with airports": airport_cities_list})
airport_cities_df.head()

Unnamed: 0,City with airports
0,Raxaul
1,Tiruchirapalli
2,Jodhpur
3,Keshod
4,Salem


In [22]:
coords_airport_cities = [ get_latlng(city) for city in airport_cities_df["City with airports"].tolist() ]
for i in coords_airport_cities:
    i[0] = '{0:.8f}'.format(i[0])
    i[1] = '{0:.8f}'.format(i[1])
df_coords = pd.DataFrame(coords_airport_cities, columns=['Latitude', 'Longitude'])
airport_cities_df['Latitude'] = df_coords['Latitude']
airport_cities_df['Longitude'] = df_coords['Longitude']
print(airport_cities_df.shape)
airport_cities_df.head()

(114, 3)


Unnamed: 0,City with airports,Latitude,Longitude
0,Raxaul,26.98258,84.85205
1,Tiruchirapalli,10.80575,78.69473
2,Jodhpur,26.26691,73.03052
3,Keshod,21.30126,70.25074
4,Salem,11.66552,78.15164


In [23]:
for lat, long, city in zip(airport_cities_df['Latitude'], airport_cities_df['Longitude'], airport_cities_df['City with airports']):
    label = '{}'.format(airport_cities_df)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=1,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.2).add_to(cities_map)  
    
cities_map

### 5. Removing cities without airports and cities which already have cricket stadiums

In [24]:
lat_check_1 = cities_df['Latitude'].isin(airport_cities_df['Latitude'])
long_check_1 = cities_df['Longitude'].isin(airport_cities_df['Longitude'])

for i in range(0,len(lat_check_1)):
    if lat_check_1[i]==False:
        if long_check_1[i]==False:
            cities_df.drop(index=i,inplace=True)
cities_df = cities_df.reset_index(drop=True)

print(cities_df.shape)
cities_df

(56, 3)


Unnamed: 0,City,Latitude,Longitude
0,Mumbai,18.94017,72.83483
1,Bangalore,12.96618,77.5869
2,Hyderabad,17.39487,78.47076
3,Ahmedabad,23.02776,72.60027
4,Chennai,13.08362,80.28252
5,Kolkata,22.57053,88.37124
6,Surat,21.18578,72.83679
7,Pune,18.50422,73.85302
8,Jaipur,26.92573,75.80659
9,Lucknow,26.85471,80.92135


In [25]:
lat_check_2 = cities_df['Latitude'].isin(stadium_cities_df['Latitude'])
long_check_2 = cities_df['Longitude'].isin(stadium_cities_df['Longitude'])

for i in range(0,len(lat_check_2)):
    if lat_check_2[i]==True:
        if long_check_2[i]==True:
            cities_df.drop(index=i,inplace=True)

cities_df = cities_df.reset_index(drop=True)
print(cities_df.shape)
cities_df

(39, 3)


Unnamed: 0,City,Latitude,Longitude
0,Surat,21.18578,72.83679
1,Jaipur,26.92573,75.80659
2,Bhopal,23.26466,77.40518
3,Vadodara,22.30946,73.17993
4,Ludhiana,30.90725,75.84919
5,Agra,27.19217,78.00007
6,Varanasi,25.33289,82.99654
7,Srinagar,34.08443,74.79906
8,Aurangabad,19.8701,75.34602
9,Amritsar,31.63347,74.87507


In [26]:
final_cities_map = folium.Map(location=[20.5937, 78.9629], zoom_start=4)

for lat, long, city in zip(cities_df['Latitude'], cities_df['Longitude'], cities_df['City']):
    label = '{}'.format(cities_df)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=1,
        popup=label,
        color='black',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.2).add_to(final_cities_map)  
    
final_cities_map

### 6. Obtaining venues via Foursquare API

In [27]:
CLIENT_ID = 'your client id' 
CLIENT_SECRET = 'your client secret'
VERSION = '20180605' 

In [28]:
radius = 15000
LIMIT = 100

venues = []

for lat, long, city in zip(cities_df['Latitude'], cities_df['Longitude'], cities_df['City']):
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, VERSION, lat, long, radius, LIMIT)
    results = requests.get(url).json()['response']['groups'][0]['items'] 
    for venue in results:
        venues.append((
            city,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [29]:
venues_df = pd.DataFrame(venues)

venues_df.columns = ['City', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head()

(1480, 7)


Unnamed: 0,City,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Surat,21.18578,72.83679,A-One Coco,21.197061,72.821175,Ice Cream Shop
1,Surat,21.18578,72.83679,Subway,21.175533,72.809047,Sandwich Place
2,Surat,21.18578,72.83679,Gokulam Dairy,21.178771,72.810985,Dairy Store
3,Surat,21.18578,72.83679,Golden Dragon,21.175125,72.804577,Chinese Restaurant
4,Surat,21.18578,72.83679,Mysore Cafe,21.184957,72.808547,Indian Restaurant


In [30]:
print('There are {} unique categories'.format(len(venues_df['VenueCategory'].unique())))
venues_df['VenueCategory'].unique()

There are 162 unique categories


array(['Ice Cream Shop', 'Sandwich Place', 'Dairy Store',
       'Chinese Restaurant', 'Indian Restaurant', 'Tea Room',
       'Restaurant', 'Coffee Shop', 'Pizza Place', 'Juice Bar',
       'Burger Joint', 'Park', 'Multiplex', 'Shopping Mall',
       'Vegetarian / Vegan Restaurant', 'Asian Restaurant', 'Hotel',
       'Food Court', 'Bakery', 'Supermarket', 'Frozen Yogurt Shop',
       'Dessert Shop', 'Fast Food Restaurant', 'Café',
       'Italian Restaurant', 'Breakfast Spot', 'Diner',
       'Department Store', 'Arcade', 'Airport', 'Bus Station', 'Hostel',
       'Temple', 'Indie Movie Theater', 'Golf Course', 'Cricket Ground',
       'Historic Site', 'Arts & Crafts Store', 'BBQ Joint', 'Bar',
       'Palace', 'Lounge', 'Snack Place', 'Art Gallery', 'Garden',
       'Deli / Bodega', 'Airport Terminal', 'Pub', 'Market',
       'Clothing Store', 'Resort', 'Plaza', 'Train Station',
       'National Park', 'Bookstore', 'Gym', 'Food Truck',
       'Mexican Restaurant', 'History Museum', 

In [31]:
print(venues_df.shape)
venues_df.groupby(["City"]).count()

(1480, 7)


Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agra,47,47,47,47,47,47
Ajmer,26,26,26,26,26,26
Allahabad,19,19,19,19,19,19
Amritsar,46,46,46,46,46,46
Aurangabad,24,24,24,24,24,24
Belgaum,23,23,23,23,23,23
Bhavnagar,9,9,9,9,9,9
Bhopal,48,48,48,48,48,48
Bhubaneswar,59,59,59,59,59,59
Chandigarh,75,75,75,75,75,75


### 7. One hot encoding

In [32]:
onehot_df = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

onehot_df['City'] = venues_df['City'] 

fixed_columns = [onehot_df.columns[-1]] + list(onehot_df.columns[:-1])
onehot_df = onehot_df[fixed_columns]
print(onehot_df.shape)
onehot_df.head()

(1480, 162)


Unnamed: 0,Zoo,Accessories Store,Airport,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Big Box Store,Bistro,Boarding House,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Cave,Chinese Restaurant,Chocolate Shop,City,Clothing Store,Coffee Shop,Convenience Store,Cricket Ground,Cupcake Shop,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Event Space,Exhibit,Factory,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Gaming Cafe,Garden,Gastropub,General Entertainment,General Travel,Golf Course,Gym,Gym / Fitness Center,Heliport,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irish Pub,Italian Restaurant,Juice Bar,Karnataka Restaurant,Korean Restaurant,Lake,Lighthouse,Lounge,Market,Mattress Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Monument / Landmark,Motel,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,National Park,Nature Preserve,Nightclub,North Indian Restaurant,Optical Shop,Organic Grocery,Outdoors & Recreation,Palace,Park,Performing Arts Venue,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Resort,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Spiritual Center,Sports Bar,Stadium,Steakhouse,Street Food Gathering,Supermarket,Tea Room,Temple,Thai Restaurant,Theater,Theme Park,Toll Booth,Town,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Water Park,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Surat,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Surat,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Surat,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,Surat,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Surat,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [33]:
city_grouped = onehot_df.groupby(["City"]).mean().reset_index()

print(city_grouped.shape)
city_grouped.head()

(39, 162)


Unnamed: 0,City,Zoo,Accessories Store,Airport,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Big Box Store,Bistro,Boarding House,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Cave,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Convenience Store,Cricket Ground,Cupcake Shop,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Event Space,Exhibit,Factory,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Gaming Cafe,Garden,Gastropub,General Entertainment,General Travel,Golf Course,Gym,Gym / Fitness Center,Heliport,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irish Pub,Italian Restaurant,Juice Bar,Karnataka Restaurant,Korean Restaurant,Lake,Lighthouse,Lounge,Market,Mattress Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Monument / Landmark,Motel,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,National Park,Nature Preserve,Nightclub,North Indian Restaurant,Optical Shop,Organic Grocery,Outdoors & Recreation,Palace,Park,Performing Arts Venue,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Resort,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Spiritual Center,Sports Bar,Stadium,Steakhouse,Street Food Gathering,Supermarket,Tea Room,Temple,Thai Restaurant,Theater,Theme Park,Toll Booth,Town,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Water Park,Women's Store
0,Agra,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.106383,0.0,0.0,0.0,0.297872,0.0,0.0,0.0,0.12766,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.042553,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ajmer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.192308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0
2,Allahabad,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.157895,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.157895,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.157895,0.0,0.0,0.0,0.0
3,Amritsar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.086957,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.108696,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0
4,Aurangabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.166667,0.0,0.0,0.041667,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 8. Obtain top 5 common venues

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [35]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = city_grouped['City']

for ind in np.arange(city_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agra,Hotel,Indian Restaurant,Historic Site,Multicuisine Indian Restaurant,Fast Food Restaurant
1,Ajmer,Hotel,Indian Restaurant,Vegetarian / Vegan Restaurant,Lake,Café
2,Allahabad,Pizza Place,Train Station,Fast Food Restaurant,Flea Market,Hotel
3,Amritsar,Indian Restaurant,Pizza Place,Café,Fast Food Restaurant,Hotel
4,Aurangabad,Hotel,Indian Restaurant,Multiplex,Restaurant,Café


In [36]:
def check(i):
    count = 0
    flag = True 
    must = ['Hotel']
    for j in must:
        if j in city_venues_sorted_copy.values[i][:].tolist(): count = count + 1
            
    if count == len(must): flag = False
    return flag

copy = city_grouped.copy()
city_venues_sorted_copy = city_venues_sorted.copy()
indexes = []
for i in range(0,39):
    if check(i): 
        indexes.append(i)
        copy.drop(index = i, inplace = True)
for i in indexes:     
    city_venues_sorted_copy.drop(index = i, inplace = True)

copy = copy.reset_index(drop=True)
city_venues_sorted_copy = city_venues_sorted_copy.reset_index(drop=True)

In [37]:
print(len(copy),len(city_venues_sorted_copy))
copy.head()

24 24


Unnamed: 0,City,Zoo,Accessories Store,Airport,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beach,Bed & Breakfast,Big Box Store,Bistro,Boarding House,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Cave,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Convenience Store,Cricket Ground,Cupcake Shop,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Event Space,Exhibit,Factory,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Gaming Cafe,Garden,Gastropub,General Entertainment,General Travel,Golf Course,Gym,Gym / Fitness Center,Heliport,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irish Pub,Italian Restaurant,Juice Bar,Karnataka Restaurant,Korean Restaurant,Lake,Lighthouse,Lounge,Market,Mattress Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Monument / Landmark,Motel,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,National Park,Nature Preserve,Nightclub,North Indian Restaurant,Optical Shop,Organic Grocery,Outdoors & Recreation,Palace,Park,Performing Arts Venue,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Resort,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Spiritual Center,Sports Bar,Stadium,Steakhouse,Street Food Gathering,Supermarket,Tea Room,Temple,Thai Restaurant,Theater,Theme Park,Toll Booth,Town,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Water Park,Women's Store
0,Agra,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.106383,0.0,0.0,0.0,0.297872,0.0,0.0,0.0,0.12766,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.042553,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ajmer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.192308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0
2,Allahabad,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.157895,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.157895,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.157895,0.0,0.0,0.0,0.0
3,Amritsar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.086957,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.108696,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0
4,Aurangabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.166667,0.0,0.0,0.041667,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 9. Cluster the cities

In [38]:
clusters = 8
df = copy.drop(["City"], 1)

kmeans = KMeans(n_clusters=clusters, random_state=0).fit(df)

kmeans.labels_

array([2, 2, 5, 1, 6, 1, 1, 7, 2, 0, 3, 2, 1, 0, 7, 1, 1, 0, 4, 1, 2, 2,
       1, 6])

In [39]:
city_venues_sorted_copy.insert(0, 'Cluster Labels', kmeans.labels_)

cities_merged_df = cities_df

cities_merged_df = cities_merged_df.join(city_venues_sorted_copy.set_index('City'), on='City')

cities_merged_df['Cluster Labels'] = cities_merged_df['Cluster Labels'] + 1
cities_merged_df = cities_merged_df.dropna()
cities_merged_df.head()

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Jaipur,26.92573,75.80659,3.0,Hotel,Historic Site,Indian Restaurant,Café,Hostel
5,Agra,27.19217,78.00007,3.0,Hotel,Indian Restaurant,Historic Site,Multicuisine Indian Restaurant,Fast Food Restaurant
6,Varanasi,25.33289,82.99654,3.0,Hotel,Indian Restaurant,Pizza Place,Café,Hostel
7,Srinagar,34.08443,74.79906,5.0,Garden,Café,Hotel,Shopping Mall,Bakery
8,Aurangabad,19.8701,75.34602,7.0,Hotel,Indian Restaurant,Multiplex,Restaurant,Café


### 10. Finally, let's visualize the resulting clusters

In [40]:
map_clusters = folium.Map(location=[20.5937, 78.9629], zoom_start=4)

x = np.arange(clusters)
ys = [i+x+(i*x)**2 for i in range(clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, city, cluster in zip(cities_merged_df['Latitude'], cities_merged_df['Longitude'], cities_merged_df['City'], cities_merged_df['Cluster Labels']):
    label = folium.Popup(str(city) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 11. Examine the clusters

#### Cluster 1

This cluster has 3 cities and have mostly multiplexes and cafe. 

In [41]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 1,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
17,Raipur,Shopping Mall,Café,Multiplex,Hotel,Fast Food Restaurant
18,Kota,Multiplex,Hotel,Café,Pizza Place,Fast Food Restaurant
30,Jamnagar,Hotel,Multiplex,Café,General Travel,Pizza Place


#### Cluster 2

This cluster has 8 cities and have lots of Indian restaurants and hotels

In [42]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 2,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
9,Amritsar,Indian Restaurant,Pizza Place,Café,Fast Food Restaurant,Hotel
11,Coimbatore,Indian Restaurant,Café,Hotel,Ice Cream Shop,Shopping Mall
14,Vijayawada,Indian Restaurant,Multiplex,Coffee Shop,Hotel,Café
21,Mysore,Indian Restaurant,Café,Hotel,Pizza Place,Shopping Mall
22,Tiruchirappalli,Indian Restaurant,Train Station,Ice Cream Shop,Multiplex,Hotel
23,Bhubaneswar,Coffee Shop,Hotel,Pizza Place,Indian Restaurant,Fast Food Restaurant
27,Kochi,Café,Hotel,Indian Restaurant,Seafood Restaurant,Ice Cream Shop
35,Mangalore,Indian Restaurant,Hotel,Ice Cream Shop,Seafood Restaurant,Snack Place


#### Cluster 3

This cluster has 6 cities and predominantly consists of hotels and indian restaurants with a variety of other eateries.

In [43]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 3,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Jaipur,Hotel,Historic Site,Indian Restaurant,Café,Hostel
5,Agra,Hotel,Indian Restaurant,Historic Site,Multicuisine Indian Restaurant,Fast Food Restaurant
6,Varanasi,Hotel,Indian Restaurant,Pizza Place,Café,Hostel
15,Jodhpur,Hotel,Indian Restaurant,Café,Historic Site,Restaurant
29,Ajmer,Hotel,Indian Restaurant,Vegetarian / Vegan Restaurant,Lake,Café
38,Udaipur,Hotel,Resort,Indian Restaurant,Restaurant,Café


#### Cluster 4

This cluster has only 1 city and has hotels but not too many eateries.

In [44]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 4,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
32,Jhansi,Hotel,Historic Site,Indian Restaurant,Pizza Place,Train Station


#### Cluster 5

This cluster has only 1 city and has many gardens but less hotels.

In [45]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 5,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
7,Srinagar,Garden,Café,Hotel,Shopping Mall,Bakery


#### Cluster 6

This cluster has only 1 city and pizza place is very common but has very less hotels

In [46]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 6,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
10,Allahabad,Pizza Place,Train Station,Fast Food Restaurant,Flea Market,Hotel


#### Cluster 7

This cluster has 2 cities and have lots of hotels and historic sites.

In [47]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 7,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,Aurangabad,Hotel,Indian Restaurant,Multiplex,Restaurant,Café
26,Warangal,Hotel,Historic Site,Multiplex,Indian Restaurant,Temple


#### Cluster 8

This cluster has 2 cities and has lots of Indian restaurants and hotels along with many shopping malls.

In [48]:
cities_merged_df.loc[cities_merged_df['Cluster Labels'] == 8,cities_merged_df.columns[[0] + list(range(4,cities_merged_df.shape[1]))]]

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
16,Madurai,Indian Restaurant,Hotel,Movie Theater,Shopping Mall,Airport
20,Hubli and Dharwad,Indian Restaurant,Hotel,Café,Shopping Mall,Food


### Conclusion:
_**Cities in the clusters 2, 3, 7, 8 are ideal for constructing a new cricket stadium as it satisfies the minimum neccessary conditions.**_