# The Battle Of Neighborhoods - Week 5

#### Installing needed packages...

In [1]:
!pip install folium
!pip install geopy
!pip install html5lib
!pip install bs4

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 5.6 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
Building wheels for collected packages: bs4
  Building wheel for bs4 (setup.py) ... [?25ldone
[?25h  Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1272 sha256=848982a6dad0663f826bd83db0b06d49161a2abd99e189078726be52af01355

In [2]:
import numpy as np 
import pandas as pd
import folium
from geopy.geocoders import Nominatim 
import requests
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
print("Libraries imported.")

Libraries imported.


#### Data is scrapped from "https://news.abplive.com/pincode/tamil-nadu/coimbatore.html" that includes Post offics, Taluks, Districts, State and Pincode. 

In [3]:
cbe = requests.get('https://news.abplive.com/pincode/tamil-nadu/coimbatore.html').text
soup = BeautifulSoup(cbe, 'html.parser')

In [4]:
table = soup.find_all('table')[0]
rows = table.find_all('tr')
len(rows)

614

In [5]:
df = pd.read_html(cbe)[0]
df = pd.DataFrame(df)

#### This is the preview of the table, here Offices are similar to neighborhoods, while Taluks are similar to Boroughs

In [6]:
df.head()

Unnamed: 0,Office,Taluk,District,State,Pincode
0,15 Velampalayam,Tiruppur,Coimbatore,TAMIL NADU,641652
1,63 Velampalayam,Palladam,Coimbatore,TAMIL NADU,641663
2,A Nagore,Udamalpet,Coimbatore,TAMIL NADU,642205
3,Achipatti,Pollachi,Coimbatore,TAMIL NADU,642002
4,Agrahara Kannadiputhur,Udumalaipettai,Coimbatore,TAMIL NADU,642111


#### States and districts are not necessary. So, they are removed from the dataframe.

In [7]:
df.drop(['State', 'District'], axis = 1, inplace = True)

In [8]:
df.head()

Unnamed: 0,Office,Taluk,Pincode
0,15 Velampalayam,Tiruppur,641652
1,63 Velampalayam,Palladam,641663
2,A Nagore,Udamalpet,642205
3,Achipatti,Pollachi,642002
4,Agrahara Kannadiputhur,Udumalaipettai,642111


In [9]:
print("There are {} unique post offices and {} unique taluks".format(len(df['Office'].unique()), len(df['Taluk'].unique())))

There are 587 unique post offices and 20 unique taluks


In [10]:
df.columns = ['PostOffice', 'Taluk', 'Pincode']
df.head()

Unnamed: 0,PostOffice,Taluk,Pincode
0,15 Velampalayam,Tiruppur,641652
1,63 Velampalayam,Palladam,641663
2,A Nagore,Udamalpet,642205
3,Achipatti,Pollachi,642002
4,Agrahara Kannadiputhur,Udumalaipettai,642111


In [11]:
df['Taluk'].unique()

array(['Tiruppur', 'Palladam', 'Udamalpet', 'Pollachi', 'Udumalaipettai',
       'Coimbatore North', 'Valparai', 'Mettupalayam', nan, 'Avanashi',
       'Coimbatore South', 'Udamalpet Ho', 'Udaamalpet', 'Coimbatore',
       'Tirupur', 'Madhavapuram', 'Mettupalyaam', 'Dindigul', 'Erode',
       'Coimabtore North'], dtype=object)

#### One Taluk is misspelled as Coimabtore North instead of Coimbatore North, this issue is solved

In [12]:
df['Taluk'] = df['Taluk'].replace(['Coimabtore North'], 'Coimbatore North')
df['Taluk'].unique()

array(['Tiruppur', 'Palladam', 'Udamalpet', 'Pollachi', 'Udumalaipettai',
       'Coimbatore North', 'Valparai', 'Mettupalayam', nan, 'Avanashi',
       'Coimbatore South', 'Udamalpet Ho', 'Udaamalpet', 'Coimbatore',
       'Tirupur', 'Madhavapuram', 'Mettupalyaam', 'Dindigul', 'Erode'],
      dtype=object)

#### This project is concentrated on the main Coimbatore city, and hence Coimbatore central (here, Coimbatore), Coimbatore North and South are chosen.

In [13]:
dfc = pd.DataFrame(columns = ['PostOffice', 'Taluk', 'Pincode'])
cbe_list = ['Coimbatore North', 'Coimbatore South', 'Coimbatore']
for item in cbe_list:
    dfc = dfc.append(df[df['Taluk'] == item])
dfc.head()

Unnamed: 0,PostOffice,Taluk,Pincode
6,Agraharasamakulam,Coimbatore North,641110
13,Alandurai,Coimbatore North,641101
22,Anaikatti,Coimbatore North,641108
38,Athipalayam,Coimbatore North,641110
45,Bharathiyar University,Coimbatore North,641046


In [14]:
dfc.shape

(183, 3)

In [15]:
dfc['Latitude'] = ''
dfc['Longitude'] = ''
dfc.head()

Unnamed: 0,PostOffice,Taluk,Pincode,Latitude,Longitude
6,Agraharasamakulam,Coimbatore North,641110,,
13,Alandurai,Coimbatore North,641101,,
22,Anaikatti,Coimbatore North,641108,,
38,Athipalayam,Coimbatore North,641110,,
45,Bharathiyar University,Coimbatore North,641046,,


In [16]:
dfc.reset_index(inplace = True)
dfc.head()

Unnamed: 0,index,PostOffice,Taluk,Pincode,Latitude,Longitude
0,6,Agraharasamakulam,Coimbatore North,641110,,
1,13,Alandurai,Coimbatore North,641101,,
2,22,Anaikatti,Coimbatore North,641108,,
3,38,Athipalayam,Coimbatore North,641110,,
4,45,Bharathiyar University,Coimbatore North,641046,,


In [17]:
dfc.drop('index', axis = 1, inplace = True)
dfc.head()

Unnamed: 0,PostOffice,Taluk,Pincode,Latitude,Longitude
0,Agraharasamakulam,Coimbatore North,641110,,
1,Alandurai,Coimbatore North,641101,,
2,Anaikatti,Coimbatore North,641108,,
3,Athipalayam,Coimbatore North,641110,,
4,Bharathiyar University,Coimbatore North,641046,,


#### Geospace data is used to fetch the coordinates of the PostOffice. Initially 30 data are fetched, if everything works correctly others are fetched.

In [18]:
df1 = dfc.loc[0:30]

In [19]:
to_drop_unknown = []
geolocator = Nominatim(user_agent="cbe_explorer")
for index, row in df1.iterrows():
    address = str(row['Pincode']) + ', Coimbatore, Tamil Nadu'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df1.loc[index, 'Latitude'] = latitude
        df1.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

The geograpical coordinate of 641110, Coimbatore, Tamil Nadu are 11.077602333333333, 76.92534593333333.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The geograpical coordinate of 641101, Coimbatore, Tamil Nadu are 10.947238299999999, 76.83048615.
The geograpical coordinate of 641108, Coimbatore, Tamil Nadu are 11.082076592955834, 76.85656344641997.
The geograpical coordinate of 641110, Coimbatore, Tamil Nadu are 11.077602333333333, 76.92534593333333.
The geograpical coordinate of 641046, Coimbatore, Tamil Nadu are 11.03896438000566, 76.8764186672722.
The geograpical coordinate of 641019, Coimbatore, Tamil Nadu are 11.202898159865692, 76.99241313161.
The geograpical coordinate of 641029, Coimbatore, Tamil Nadu are 11.06278109092305, 76.9407714358169.
The geograpical coordinate of 641049, Coimbatore, Tamil Nadu are 11.066194967375152, 76.99028373033897.
The geograpical coordinate of 641019, Coimbatore, Tamil Nadu are 11.202898159865692, 76.99241313161.
The geograpical coordinate of 641109, Coimbatore, Tamil Nadu are 10.9890325, 76.816030275.
The geograpical coordinate of 641109, Coimbatore, Tamil Nadu are 10.9890325, 76.816030275.
Th

In [20]:
df1.head()

Unnamed: 0,PostOffice,Taluk,Pincode,Latitude,Longitude
0,Agraharasamakulam,Coimbatore North,641110,11.0776,76.9253
1,Alandurai,Coimbatore North,641101,10.9472,76.8305
2,Anaikatti,Coimbatore North,641108,11.0821,76.8566
3,Athipalayam,Coimbatore North,641110,11.0776,76.9253
4,Bharathiyar University,Coimbatore North,641046,11.039,76.8764


#### Seems like there are no issues faced, and hence the coordinates of other Post offices are fetched and both the dataframes are merged to one.

In [21]:
df2 = dfc.loc[31:182]
df2.head()

Unnamed: 0,PostOffice,Taluk,Pincode,Latitude,Longitude
31,Mathipalayam,Coimbatore North,641101,,
32,N G G O Colony,Coimbatore North,641022,,
33,Naickenpalayam,Coimbatore North,641020,,
34,Nanjundapuram tadagam,Coimbatore North,641108,,
35,Narasimhanaickenpalayam,Coimbatore North,641031,,


In [22]:
geolocator = Nominatim(user_agent="cbe_explorer2")
print("Finding...")
for index, row in df2.iterrows():
    address = str(row['Pincode']) + ', Coimbatore, Tamil Nadu'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        #print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df2.loc[index, 'Latitude'] = latitude
        df2.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)
print("Completed!")

Finding...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Cannot do: 641003, Coimbatore, Tamil Nadu, will drop index: 113
Completed!


In [23]:
to_drop_unknown

[113]

#### The coordinates of one pincode 641003 could not be fetched and hence, it is dropped from the dataframe.

In [24]:
dfg = df1.append(df2, ignore_index = True)
processed_df = dfg.drop(to_drop_unknown)
processed_df

Unnamed: 0,PostOffice,Taluk,Pincode,Latitude,Longitude
0,Agraharasamakulam,Coimbatore North,641110,11.0776,76.9253
1,Alandurai,Coimbatore North,641101,10.9472,76.8305
2,Anaikatti,Coimbatore North,641108,11.0821,76.8566
3,Athipalayam,Coimbatore North,641110,11.0776,76.9253
4,Bharathiyar University,Coimbatore North,641046,11.039,76.8764
...,...,...,...,...,...
178,Kuttagam,Coimbatore,638462,11.0081,76.9795
179,Malumichampatti,Coimbatore,641050,10.9196,76.9985
180,Merkupathi,Coimbatore,638103,11.0081,76.9795
181,Vadavalli,Coimbatore,641041,11.0273,76.9116


In [25]:
processed_df.reset_index(inplace = True)
processed_df.head()

Unnamed: 0,index,PostOffice,Taluk,Pincode,Latitude,Longitude
0,0,Agraharasamakulam,Coimbatore North,641110,11.0776,76.9253
1,1,Alandurai,Coimbatore North,641101,10.9472,76.8305
2,2,Anaikatti,Coimbatore North,641108,11.0821,76.8566
3,3,Athipalayam,Coimbatore North,641110,11.0776,76.9253
4,4,Bharathiyar University,Coimbatore North,641046,11.039,76.8764


In [26]:
processed_df.drop('index', axis = 1, inplace = True)
processed_df

Unnamed: 0,PostOffice,Taluk,Pincode,Latitude,Longitude
0,Agraharasamakulam,Coimbatore North,641110,11.0776,76.9253
1,Alandurai,Coimbatore North,641101,10.9472,76.8305
2,Anaikatti,Coimbatore North,641108,11.0821,76.8566
3,Athipalayam,Coimbatore North,641110,11.0776,76.9253
4,Bharathiyar University,Coimbatore North,641046,11.039,76.8764
...,...,...,...,...,...
177,Kuttagam,Coimbatore,638462,11.0081,76.9795
178,Malumichampatti,Coimbatore,641050,10.9196,76.9985
179,Merkupathi,Coimbatore,638103,11.0081,76.9795
180,Vadavalli,Coimbatore,641041,11.0273,76.9116


In [27]:
processed_df.to_csv('cbe_data.csv')

#### To visualize the localities, folium is employed.

In [28]:
my_map = folium.Map(location=[latitude, longitude], zoom_start=11)
# add markers to map
for lat, lng, label in zip(processed_df['Latitude'], processed_df['Longitude'], processed_df['PostOffice']):
    label = folium.Popup( label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(my_map)  
my_map

#### Displaying map as image, as folium will not be displayed on github.

In [57]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://github.com/nithin-gangadharan-rangaraj/Coursera_Capstone/blob/main/w51.PNG?raw=true")

#### Foursquare is used to fetch the venues.

In [29]:
CLIENT_ID = '12QDFM5UPUQHCVNOXEIBZ3INBH0ODCP0GTC4JUYZAX4XXOYW' 
CLIENT_SECRET = '1HOJDAL3AAVJKTDCWVJGWPUWLXFN2SQADVT0ZIHF2AIDRR3G' 
VERSION = '20180605'
LIMIT = 100

In [30]:
def getNearbyVenues(names, taluks, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, taluk, lat, lng in zip(names, taluks, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            taluk,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostOffice',
                  'Taluk',
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
print("Fetching venues...")
venues = getNearbyVenues(names=processed_df['PostOffice'],
                         taluks = processed_df['Taluk'],
                         latitudes=processed_df['Latitude'],
                         longitudes=processed_df['Longitude']
                         )
print("----------Fetched----------")

Fetching venues...
----------Fetched----------


In [32]:
venues.head()

Unnamed: 0,PostOffice,Taluk,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cherannagar,Coimbatore North,11.062781,76.940771,Kada Peru Theriyala,11.059936,76.940182,Food Truck
1,Cherannagar,Coimbatore North,11.062781,76.940771,Nandhini Bakery,11.060019,76.939815,Bakery
2,Cherannagar,Coimbatore North,11.062781,76.940771,Shree Kulfi,11.05967,76.94099,Ice Cream Shop
3,Cherannagar,Coimbatore North,11.062781,76.940771,Linda,11.063611,76.94413,Fast Food Restaurant
4,Edayarpalayam,Coimbatore North,11.038393,76.928186,Edayarpalayam,11.038498,76.925066,Bus Station


#### Venues named "Kada Peru Theriyala" which means "Name Unknown" are removed from the dataframe.

In [33]:
venues.drop([0,12,103], inplace = True)

In [34]:
venues.reset_index(inplace = True)
venues.drop('index', axis = 1, inplace = True)
venues

Unnamed: 0,PostOffice,Taluk,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cherannagar,Coimbatore North,11.062781,76.940771,Nandhini Bakery,11.060019,76.939815,Bakery
1,Cherannagar,Coimbatore North,11.062781,76.940771,Shree Kulfi,11.059670,76.940990,Ice Cream Shop
2,Cherannagar,Coimbatore North,11.062781,76.940771,Linda,11.063611,76.944130,Fast Food Restaurant
3,Edayarpalayam,Coimbatore North,11.038393,76.928186,Edayarpalayam,11.038498,76.925066,Bus Station
4,Edayarpalayam,Coimbatore North,11.038393,76.928186,Express Super Market,11.041253,76.929969,Department Store
...,...,...,...,...,...,...,...,...
503,Vallipuram,Coimbatore,11.008114,76.979455,Hotel Hari Bhavan,11.008444,76.974953,Indian Restaurant
504,Vallipuram,Coimbatore,11.008114,76.979455,Bikes And Barells,11.010257,76.980719,Nightclub
505,Vallipuram,Coimbatore,11.008114,76.979455,Bird On Tree,11.008887,76.983805,Restaurant
506,Vallipuram,Coimbatore,11.008114,76.979455,Afghan Grill,11.010257,76.980719,Middle Eastern Restaurant


In [35]:
print("There are a total of {} venues with {} unique categories". format(venues.shape[0], len(venues['Venue Category'].unique())))

There are a total of 508 venues with 61 unique categories


In [36]:
venues["Venue Category"].unique()

array(['Bakery', 'Ice Cream Shop', 'Fast Food Restaurant', 'Bus Station',
       'Department Store', 'Indie Movie Theater', 'Grocery Store', 'Gym',
       'Food Truck', 'Hotel', 'Clothing Store', 'Wings Joint',
       'Indian Restaurant', 'Mobile Phone Shop', 'Pizza Place',
       'Dessert Shop', 'Pool Hall', 'Aquarium', 'Chinese Restaurant',
       'Tea Room', 'Voting Booth', 'Snack Place', 'Outdoors & Recreation',
       'ATM', 'Asian Restaurant', 'Electronics Store', 'Multiplex',
       'Market', 'Café', 'Health Food Store', 'Bank', 'Train Station',
       'Fried Chicken Joint', 'Noodle House', 'South Indian Restaurant',
       'Nightclub', 'Medical Center', 'Playground', 'Pharmacy',
       'Shopping Mall', 'Movie Theater', 'Furniture / Home Store',
       'Coffee Shop', "Women's Store", 'Jewelry Store',
       'Vegetarian / Vegan Restaurant', 'Park', 'Diner', 'Bus Stop',
       'Convenience Store', 'Italian Restaurant', 'Lounge', 'Hostel',
       'Gift Shop', 'Middle Eastern Restau

#### This projects aims in finding the best location to open any kind of food shops. To know the areas with high demand, locations with high number of restuarants are determined. Hence, all venues other than food shops are not used.

In [37]:
rest = ['Bakery', 'Café', 'South Indian Restaurant', 'Cupcake Shop',
       'Fast Food Restaurant', 'Hotel', 'Indian Restaurant',
       'Fish & Chips Shop', 'Dessert Shop', 'Food & Drink Shop', 'Food Stand',
       'Chinese Restaurant', 'Tea Room','Snack Place', 'Restaurant', 
       'Shawarma Place', 'Food Truck', 'Asian Restaurant','Pizza Place',
       'Fried Chicken Joint', 'Noodle House','Coffee Shop','Vegetarian / Vegan Restaurant', 
       'Ice Cream Shop',
       'Italian Restaurant',
       'Middle Eastern Restaurant',
       'Diner', 'Burger Joint', 'French Restaurant',
       'Mediterranean Restaurant']

In [38]:
rest_df = pd.DataFrame(columns = ['PostOffice',
                  'Taluk',
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category'] )
for item in rest:
    rest_df = rest_df.append(venues[venues['Venue Category'] == item])
rest_df

Unnamed: 0,PostOffice,Taluk,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cherannagar,Coimbatore North,11.062781,76.940771,Nandhini Bakery,11.060019,76.939815,Bakery
21,Keeranatham,Coimbatore North,11.072893,77.001949,Ragam Bakery,11.075109,77.002251,Bakery
38,Narasimhanaickenpalayam,Coimbatore North,11.026434,76.942312,V's Café Boulangerie et Pâtisserie,11.025184,76.939369,Bakery
41,Narasimhanaickenpalayam,Coimbatore North,11.026434,76.942312,Chef Bakers,11.024284,76.939119,Bakery
44,P&t Staff Quarters,Coimbatore North,11.024744,76.944425,The Donuts,11.026499,76.944498,Bakery
...,...,...,...,...,...,...,...,...
313,R S Puram East,Coimbatore South,11.010613,76.951487,The french door,11.014044,76.948859,French Restaurant
333,R S Puram South,Coimbatore South,11.010613,76.951487,The french door,11.014044,76.948859,French Restaurant
353,R S Puram West,Coimbatore South,11.010613,76.951487,The french door,11.014044,76.948859,French Restaurant
387,Rathinasabapathy Puram,Coimbatore South,11.010613,76.951487,The french door,11.014044,76.948859,French Restaurant


In [39]:
# one hot encoding
rest_onehot = pd.get_dummies(rest_df[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
rest_onehot['PostOffice'] = rest_df['PostOffice']

first_column = rest_onehot.pop('PostOffice')
rest_onehot.insert(0, 'PostOffice', first_column)
rest_onehot.insert(1, 'Latitude', rest_df['Latitude'])
rest_onehot.insert(2, 'Longitude', rest_df['Longitude'])
rest_onehot

Unnamed: 0,PostOffice,Latitude,Longitude,Asian Restaurant,Bakery,Café,Chinese Restaurant,Coffee Shop,Dessert Shop,Diner,...,Italian Restaurant,Mediterranean Restaurant,Middle Eastern Restaurant,Noodle House,Pizza Place,Restaurant,Snack Place,South Indian Restaurant,Tea Room,Vegetarian / Vegan Restaurant
0,Cherannagar,11.062781,76.940771,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21,Keeranatham,11.072893,77.001949,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
38,Narasimhanaickenpalayam,11.026434,76.942312,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41,Narasimhanaickenpalayam,11.026434,76.942312,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
44,P&t Staff Quarters,11.024744,76.944425,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313,R S Puram East,11.010613,76.951487,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
333,R S Puram South,11.010613,76.951487,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
353,R S Puram West,11.010613,76.951487,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
387,Rathinasabapathy Puram,11.010613,76.951487,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
rest_onehot['Total Eating Places'] = rest_onehot.iloc[:,3:].sum(axis = 1)
second_column = rest_onehot.pop('Total Eating Places')
rest_onehot.insert(1, 'Total Eating Places', second_column)
rest = rest_onehot.iloc[:,:4]
rest.head()

Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude
0,Cherannagar,1,11.062781,76.940771
21,Keeranatham,1,11.072893,77.001949
38,Narasimhanaickenpalayam,1,11.026434,76.942312
41,Narasimhanaickenpalayam,1,11.026434,76.942312
44,P&t Staff Quarters,1,11.024744,76.944425


In [41]:
rest

Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude
0,Cherannagar,1,11.062781,76.940771
21,Keeranatham,1,11.072893,77.001949
38,Narasimhanaickenpalayam,1,11.026434,76.942312
41,Narasimhanaickenpalayam,1,11.026434,76.942312
44,P&t Staff Quarters,1,11.024744,76.944425
...,...,...,...,...
313,R S Puram East,1,11.010613,76.951487
333,R S Puram South,1,11.010613,76.951487
353,R S Puram West,1,11.010613,76.951487
387,Rathinasabapathy Puram,1,11.010613,76.951487


In [42]:
rest_final = rest.iloc[:,:2].groupby('PostOffice').count().reset_index()

In [43]:
rest_final

Unnamed: 0,PostOffice,Total Eating Places
0,Amritanagar,3
1,CBE Mpl Central Busstand,3
2,Cherannagar,3
3,Chettipalayam,3
4,Coimbatore Aerodrome,2
...,...,...
78,Vellakinar,4
79,Vellalapalayam Podanur,1
80,Vellalore,1
81,Venkitapuram,6


In [44]:
merged = pd.merge(rest_final, rest.drop('Total Eating Places', axis = 1), on = 'PostOffice')

In [45]:
merged = merged.drop_duplicates()

#### This is the final dataframe with total number of eating places in each locality along with their coordinates.

In [46]:
merged

Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude
0,Amritanagar,3,11.001812,76.962842
3,CBE Mpl Central Busstand,3,11.015528,76.989695
6,Cherannagar,3,11.062781,76.940771
9,Chettipalayam,3,11.001812,76.962842
12,Coimbatore Aerodrome,2,11.030835,77.023088
...,...,...,...,...
307,Vellakinar,4,11.062781,76.940771
311,Vellalapalayam Podanur,1,10.979933,77.029073
312,Vellalore,1,10.979933,77.029073
313,Venkitapuram,6,11.056904,77.073897


#### Clustering is performed with the aim of splitting the data into 3 clusters.

In [47]:
# set number of clusters
kclusters = 3

rest_clustering = merged[["Total Eating Places"]]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(rest_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [48]:
rest_clustering = merged.copy()
rest_clustering["Cluster Labels"] = kmeans.labels_
print(rest_clustering.shape)
rest_clustering.head() 

(83, 5)


Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude,Cluster Labels
0,Amritanagar,3,11.001812,76.962842,1
3,CBE Mpl Central Busstand,3,11.015528,76.989695,1
6,Cherannagar,3,11.062781,76.940771,1
9,Chettipalayam,3,11.001812,76.962842,1
12,Coimbatore Aerodrome,2,11.030835,77.023088,1


In [49]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [50]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(rest_clustering['Latitude'], rest_clustering['Longitude'], rest_clustering['PostOffice'], rest_clustering['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [58]:
Image(url= "https://github.com/nithin-gangadharan-rangaraj/Coursera_Capstone/blob/main/w52.PNG?raw=true")

#### 0 class localities are the ones with high number eating places (red).

In [51]:
rest_clustering.loc[rest_clustering['Cluster Labels'] == 0]

Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude,Cluster Labels
141,R S Puram East,17,11.010613,76.951487,0
158,R S Puram South,17,11.010613,76.951487,0
175,R S Puram West,17,11.010613,76.951487,0
210,Rathinasabapathy Puram,17,11.010613,76.951487,0


#### 1 class localities are those with least number of eating places (violet)

In [52]:
rest_clustering.loc[rest_clustering['Cluster Labels'] == 1]

Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude,Cluster Labels
0,Amritanagar,3,11.001812,76.962842,1
3,CBE Mpl Central Busstand,3,11.015528,76.989695,1
6,Cherannagar,3,11.062781,76.940771,1
9,Chettipalayam,3,11.001812,76.962842,1
12,Coimbatore Aerodrome,2,11.030835,77.023088,1
...,...,...,...,...,...
287,Vadamadurai Kurudampalayam,2,11.088758,76.939076,1
307,Vellakinar,4,11.062781,76.940771,1
311,Vellalapalayam Podanur,1,10.979933,77.029073,1
312,Vellalore,1,10.979933,77.029073,1


#### 2 class localities are those with moderate number of eating places (light blue).

In [53]:
rest_clustering.loc[rest_clustering['Cluster Labels'] == 2]

Unnamed: 0,PostOffice,Total Eating Places,Latitude,Longitude,Cluster Labels
71,Kuttagam,6,11.008114,76.979455,2
78,Merkupathi,6,11.008114,76.979455,2
84,Mylampatti,6,11.056904,77.073897,2
93,Narasimhanaickenpalayam,6,11.026434,76.942312,2
102,Nilambur,6,11.056904,77.073897,2
192,Rakkipalayam,6,11.026434,76.942312,2
201,Ramnagar Coimbatore,7,11.013197,76.963196,2
230,S B Institute,6,11.005102,76.955115,2
248,Seeranaickenpalayam,6,11.005102,76.955115,2
256,Sinniampalayam,6,11.056904,77.073897,2


## Thus, depending on the number of restaurants/food places the localities are clustered.