# <b> Relocation Project </b> 

## <b> Load Most populous Cities in India using Wikipedia link </b>

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
url = "https://en.wikipedia.org/wiki/List_of_cities_in_India_by_population"
response = requests.get(url)
html_doc=response.text
soup = BeautifulSoup(html_doc, 'html.parser')

## <b>Create Dataframe from the HTML content of Wikipedia</b>

In [8]:
import numpy as np
colNames = []
allRows=[]
table = soup.find('table', attrs={'class':'wikitable sortable'})
table_body = table.find('tbody')
rowHeader = table.find_all('th')
for rHeader in rowHeader:
    colNames.append(rHeader.text.strip())
    
rows = table_body.find_all('tr')[1:]

for row in rows:
    cols = row.find_all('td')       
    rowData = [ele.text.strip() for ele in cols]  
    allRows.append(rowData)

df = pd.DataFrame(columns=colNames)
for tr in rows:
    tds = tr.find_all('td')
    df = df.append({colNames[0]: tds[0].get_text().strip(),colNames[1]: tds[1].get_text().strip(),colNames[2]: tds[2].get_text().strip()},ignore_index=True)
    
df
cols = [3,4,5]
df.drop(df.columns[cols],axis=1,inplace=True)
#df.drop('Population(2001)','State or union territory','Ref', axis=1)

df=df.head(50)
df

Unnamed: 0,Rank,City,Population(2011)[3]
0,1,Mumbai,12442373
1,2,Delhi,11034555
2,3,Bangalore,8443675
3,4,Hyderabad,6993262
4,5,Ahmedabad,5577940
5,6,Chennai,4646732
6,7,Kolkata,4496694
7,8,Surat,4467797
8,9,Pune,3124458
9,10,Jaipur,3046163


## <b>Use Geolocator to fetch Latitude and Longitudes of Cities fetched from Wikipedia</b>

In [7]:
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
from pandas.io.json import json_normalize

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge


In [11]:
lat_list=[]
lon_list=[]
for city,population in zip(df['City'], 
                                            df['Population(2011)[3]'],
                                            ):
    
    geolocator = Nominatim()
    if city is not None:
        location = geolocator.geocode(city)
        if location is not None:
            latitude = location.latitude
            longitude = location.longitude
            lat_list.append(latitude)
            lon_list.append(longitude)
        else:
            lat_list.append(-1)
            lon_list.append(-1)
    else:
        lat_list.append(-1)
        lon_list.append(-1)
    #print('The geograpical coordinates of City {} are {}, {}.'.format(city,latitude, longitude))
    

df['latitude']=lat_list
df['longitude']=lon_list
df = df[df.latitude != -1]
df

list_latLong=[]
dict_CityLatLong = dict()
for city,lat,long in zip(df['City'],df['latitude'],df['longitude']):
    list_latLong=[]
    list_latLong=[lat,long]
    
    dict_CityLatLong[city]=list_latLong

dict_CityLatLong





{'Agra': [27.1752554, 78.0098161],
 'Ahmedabad': [23.0216238, 72.5797068],
 'Bangalore': [12.9791198, 77.5912997],
 'Bhopal': [23.2530923, 77.3962718],
 'Chennai': [13.0801721, 80.2838331],
 'Delhi': [28.6517178, 77.2219388],
 'Ghaziabad': [28.666667, 77.666667],
 'Hyderabad': [17.3616079, 78.4746286],
 'Indore': [22.7203851, 75.8682103],
 'Jaipur': [26.916194, 75.820349],
 'Kanpur': [26.4609135, 80.3217588],
 'Kolkata': [22.5677459, 88.3476023],
 'Lucknow': [26.8381, 80.9346001],
 'Ludhiana': [30.9090157, 75.851601],
 'Mumbai': [18.9387711, 72.8353355],
 'Nagpur': [21.1500964, 79.0127048991187],
 'Patna': [25.6093239, 85.1235252],
 'Pimpri-Chinchwad': [18.6279288, 73.8009829],
 'Pune': [18.5203062, 73.8543185],
 'Surat': [21.1864607, 72.8081281],
 'Thane': [19.1943294, 72.9701779],
 'Vadodara': [22.297076, 73.1957373]}

In [12]:
# The code was removed by Watson Studio for sharing.

## <b> Print the Dataframe with City,Population,Latitude,Longitude </b>

In [17]:
location = geolocator.geocode("India")
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of City  are {}, {}.'.format(latitude, longitude))
df

The geograpical coordinates of City  are 22.3511148, 78.6677428.


Unnamed: 0,Rank,City,Population(2011)[3],latitude,longitude
0,1,Mumbai,12442373,18.938771,72.835335
1,2,Delhi,11034555,28.651718,77.221939
2,3,Bangalore,8443675,12.97912,77.5913
3,4,Hyderabad,6993262,17.361608,78.474629
4,5,Ahmedabad,5577940,23.021624,72.579707
5,6,Chennai,4646732,13.080172,80.283833
6,7,Kolkata,4496694,22.567746,88.347602
7,8,Surat,4467797,21.186461,72.808128
8,9,Pune,3124458,18.520306,73.854319
9,10,Jaipur,3046163,26.916194,75.820349


## <b> Display Map highlighting the Cities </b>

In [18]:
import folium

map = folium.Map(location=[latitude, longitude], zoom_start=10)

for city,population,lat,long in zip(df['City'], 
                                            df['Population(2011)[3]'],
                                            df['latitude'],df['longitude']):
     
    label = "{}, {}".format(city, population)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map) 
    
map

## Use Foursquare API to fetch venue details for each city

In [19]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
#Retriveing all the nearby venues across all the neighborhoods in Toronto
india_venues = getNearbyVenues(names=df['City'],
                                   latitudes=df['latitude'],
                                   longitudes=df['longitude']
                                  )


Mumbai
Delhi
Bangalore
Hyderabad
Ahmedabad
Chennai
Kolkata
Surat
Pune
Jaipur
Lucknow
Kanpur
Nagpur
Indore
Thane
Bhopal
Pimpri-Chinchwad
Patna
Vadodara
Ghaziabad
Ludhiana
Agra


In [22]:
india_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agra,5,5,5,5,5,5
Ahmedabad,5,5,5,5,5,5
Bangalore,4,4,4,4,4,4
Bhopal,4,4,4,4,4,4
Chennai,4,4,4,4,4,4
Hyderabad,6,6,6,6,6,6
Indore,4,4,4,4,4,4
Jaipur,6,6,6,6,6,6
Kanpur,2,2,2,2,2,2
Kolkata,4,4,4,4,4,4


In [23]:
print('There are {} uniques categories.'.format(len(india_venues['Venue Category'].unique())))

There are 63 uniques categories.


## One hot encoding of venue data

In [25]:
# one hot encoding
india_onehot = pd.get_dummies(india_venues[['Venue Category']] , prefix="", prefix_sep="")

# add neighborhood column back to dataframe
india_onehot['Neighbourhood'] = india_venues['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [india_onehot.columns[-1]] + list(india_onehot.columns[:-1])
india_onehot = india_onehot[fixed_columns]

india_onehot.head()

Unnamed: 0,Neighbourhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bar,Boarding House,Burger Joint,Bus Station,...,Shoe Store,Shopping Mall,South Indian Restaurant,Spa,Stadium,Tea Room,Theater,Toll Plaza,Train Station,Vegetarian / Vegan Restaurant
0,Mumbai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Mumbai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,Mumbai,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Mumbai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Mumbai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
india_onehot.shape

(127, 64)

In [28]:
india_grouped = india_onehot.groupby('Neighbourhood').mean().reset_index()
india_grouped

Unnamed: 0,Neighbourhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bar,Boarding House,Burger Joint,Bus Station,...,Shoe Store,Shopping Mall,South Indian Restaurant,Spa,Stadium,Tea Room,Theater,Toll Plaza,Train Station,Vegetarian / Vegan Restaurant
0,Agra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ahmedabad,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bangalore,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bhopal,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Chennai,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0
5,Hyderabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Indore,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
7,Jaipur,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0
8,Kanpur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Kolkata,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
india_grouped.shape

(20, 64)

## Top venues in each City

In [29]:
num_top_venues = 5

for hood in india_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = india_grouped[india_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agra----
               venue  freq
0     Clothing Store  0.25
1              Hotel  0.25
2       Camera Store  0.25
3         Shoe Store  0.25
4  Afghan Restaurant  0.00


----Ahmedabad----
                venue  freq
0         Art Gallery  0.25
1               River  0.25
2              Castle  0.25
3                Park  0.25
4  Mughlai Restaurant  0.00


----Bangalore----
              venue  freq
0          Vineyard  0.25
1     Metro Station  0.25
2  Capitol Building  0.25
3              Park  0.25
4         Multiplex  0.00


----Bhopal----
                  venue  freq
0             Racetrack  0.17
1         Historic Site  0.17
2                Bakery  0.17
3  Fast Food Restaurant  0.17
4        Boarding House  0.17


----Chennai----
             venue  freq
0  Harbor / Marina  0.25
1         Pharmacy  0.25
2    Train Station  0.25
3           Museum  0.25
4    Movie Theater  0.00


----Hyderabad----
                     venue  freq
0  South Indian Restaurant  0.14
1      Mon

In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [31]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = india_grouped['Neighbourhood']

for ind in np.arange(india_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(india_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agra,Indian Restaurant,Shoe Store,Clothing Store,Camera Store,Hotel,Bakery,Dance Studio,History Museum,Historic Site,Harbor / Marina
1,Ahmedabad,Indian Restaurant,River,Castle,Park,Art Gallery,Stadium,Chinese Restaurant,Harbor / Marina,Food Truck,Food Court
2,Bangalore,Park,Metro Station,Capitol Building,Hotel,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,Clothing Store,Diner
3,Bhopal,Fast Food Restaurant,Historic Site,Racetrack,Boarding House,Donut Shop,Dance Studio,Department Store,Dessert Shop,Diner,Vegetarian / Vegan Restaurant
4,Chennai,Train Station,Museum,Harbor / Marina,Business Service,History Museum,Historic Site,Food Truck,Food Court,Fast Food Restaurant,Electronics Store
5,Hyderabad,History Museum,South Indian Restaurant,Monument / Landmark,Café,Clothing Store,Coffee Shop,Fast Food Restaurant,Food Court,Food Truck,Donut Shop
6,Indore,Vegetarian / Vegan Restaurant,Multiplex,Ice Cream Shop,Indian Restaurant,Tea Room,Chinese Restaurant,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant
7,Jaipur,Coffee Shop,Arts & Crafts Store,Historic Site,Tea Room,Dessert Shop,Market,Donut Shop,Dance Studio,Department Store,Diner
8,Kanpur,Electronics Store,Coffee Shop,Vegetarian / Vegan Restaurant,Clothing Store,History Museum,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant
9,Kolkata,Indian Restaurant,Hotel,Indian Sweet Shop,Mughlai Restaurant,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,Electronics Store


## Use KMeans algorithm to cluster cities based on Venue data

In [32]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

india_grouped_clustering = india_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(india_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

kmeans.labels_



array([0, 1, 0, 0, 1, 1, 1, 1, 3, 0, 0, 0, 1, 2, 4, 1, 1, 1, 1, 1], dtype=int32)

In [33]:
india_merged = df

# add clustering labels
#india_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood


india_merged = india_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='City',how="right")

india_merged['Cluster Labels'] = kmeans.labels_


india_merged

Unnamed: 0,Rank,City,Population(2011)[3],latitude,longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,1,Mumbai,12442373,18.938771,72.835335,Indian Restaurant,Irani Cafe,Bar,Sandwich Place,Hotel,Food Truck,Train Station,Fast Food Restaurant,Lounge,Multiplex,0
2,3,Bangalore,8443675,12.97912,77.5913,Park,Metro Station,Capitol Building,Hotel,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,Clothing Store,Diner,1
3,4,Hyderabad,6993262,17.361608,78.474629,History Museum,South Indian Restaurant,Monument / Landmark,Café,Clothing Store,Coffee Shop,Fast Food Restaurant,Food Court,Food Truck,Donut Shop,0
4,5,Ahmedabad,5577940,23.021624,72.579707,Indian Restaurant,River,Castle,Park,Art Gallery,Stadium,Chinese Restaurant,Harbor / Marina,Food Truck,Food Court,0
5,6,Chennai,4646732,13.080172,80.283833,Train Station,Museum,Harbor / Marina,Business Service,History Museum,Historic Site,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,1
6,7,Kolkata,4496694,22.567746,88.347602,Indian Restaurant,Hotel,Indian Sweet Shop,Mughlai Restaurant,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,1
7,8,Surat,4467797,21.186461,72.808128,Indian Restaurant,Department Store,Restaurant,Dessert Shop,Juice Bar,Café,Donut Shop,Dance Studio,Diner,Electronics Store,1
8,9,Pune,3124458,18.520306,73.854319,Historic Site,Bakery,Motorcycle Shop,Multiplex,Vegetarian / Vegan Restaurant,Coffee Shop,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,1
9,10,Jaipur,3046163,26.916194,75.820349,Coffee Shop,Arts & Crafts Store,Historic Site,Tea Room,Dessert Shop,Market,Donut Shop,Dance Studio,Department Store,Diner,3
10,11,Lucknow,2817105,26.8381,80.9346,Indian Restaurant,Fast Food Restaurant,Hotel,Asian Restaurant,Athletics & Sports,History Museum,Historic Site,Harbor / Marina,Food Truck,Food Court,0


## <b> Visualize clusters of cities using Map </b>

In [34]:
from matplotlib import cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(india_merged['latitude'], india_merged['longitude'], india_merged['City'],
                                  india_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<b>Cluster 1</b>

In [45]:
india_merged.loc[india_merged['Cluster Labels'] == 0, india_merged.columns[[0, 1] + list(range(5, india_merged.shape[1]))]]

Unnamed: 0,Rank,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,1,Mumbai,Indian Restaurant,Irani Cafe,Bar,Sandwich Place,Hotel,Food Truck,Train Station,Fast Food Restaurant,Lounge,Multiplex,0
3,4,Hyderabad,History Museum,South Indian Restaurant,Monument / Landmark,Café,Clothing Store,Coffee Shop,Fast Food Restaurant,Food Court,Food Truck,Donut Shop,0
4,5,Ahmedabad,Indian Restaurant,River,Castle,Park,Art Gallery,Stadium,Chinese Restaurant,Harbor / Marina,Food Truck,Food Court,0
10,11,Lucknow,Indian Restaurant,Fast Food Restaurant,Hotel,Asian Restaurant,Athletics & Sports,History Museum,Historic Site,Harbor / Marina,Food Truck,Food Court,0
11,12,Kanpur,Electronics Store,Coffee Shop,Vegetarian / Vegan Restaurant,Clothing Store,History Museum,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,0
12,13,Nagpur,Toll Plaza,Vegetarian / Vegan Restaurant,Clothing Store,History Museum,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,0


<b>Cluster 2</b>

In [46]:
india_merged.loc[india_merged['Cluster Labels'] == 2, india_merged.columns[[0, 1] + list(range(5, india_merged.shape[1]))]]

Unnamed: 0,Rank,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
15,16,Thane,Ice Cream Shop,Indian Restaurant,Vegetarian / Vegan Restaurant,Theater,Donut Shop,Dessert Shop,Diner,Café,Italian Restaurant,Pizza Place,2


<b>Cluster 3</b>

In [47]:
india_merged.loc[india_merged['Cluster Labels'] == 1, india_merged.columns[[0, 1] + list(range(5, india_merged.shape[1]))]]

Unnamed: 0,Rank,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
2,3,Bangalore,Park,Metro Station,Capitol Building,Hotel,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,Clothing Store,Diner,1
5,6,Chennai,Train Station,Museum,Harbor / Marina,Business Service,History Museum,Historic Site,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,1
6,7,Kolkata,Indian Restaurant,Hotel,Indian Sweet Shop,Mughlai Restaurant,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,1
7,8,Surat,Indian Restaurant,Department Store,Restaurant,Dessert Shop,Juice Bar,Café,Donut Shop,Dance Studio,Diner,Electronics Store,1
8,9,Pune,Historic Site,Bakery,Motorcycle Shop,Multiplex,Vegetarian / Vegan Restaurant,Coffee Shop,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,1
14,15,Indore,Vegetarian / Vegan Restaurant,Multiplex,Ice Cream Shop,Indian Restaurant,Tea Room,Chinese Restaurant,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,1
17,18,Pimpri-Chinchwad,Indian Restaurant,Jewelry Store,Hotel,Pizza Place,Dance Studio,Shopping Mall,Electronics Store,Coffee Shop,Fast Food Restaurant,Food Court,1
18,19,Patna,Jewelry Store,Vegetarian / Vegan Restaurant,Ice Cream Shop,History Museum,Historic Site,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,1
19,20,Vadodara,Indian Restaurant,History Museum,Historic Site,Stadium,Clothing Store,Harbor / Marina,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,1
21,22,Ludhiana,Fast Food Restaurant,Burger Joint,Hotel,Food Court,Restaurant,Shopping Mall,Kids Store,Donut Shop,Department Store,Dessert Shop,1


## From the cluster data above we can predict  the possible options as below:

### If a person from city Bangalore wants to relocate to a similar city in India, he will have the following options


### Chennai,Kolkata,Pune,etc


[As these belong to same cluster]