# GOA RESTAURANT BUSINESS

### importing  all the required libraries

In [1]:
import requests #for downloading the html file of the webpage
from bs4 import BeautifulSoup #Easily handling to html files
import pandas as pd #for data analysis
# !pip install geocoder
from geopy.geocoders import Nominatim #location to Lat & Long
import geocoder 
import folium #Visulaization on Map
from sklearn.cluster import KMeans # K - Nearest Neighbor Algorithm
import numpy as np 
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
#url for list of cities in goa
url = 'https://en.wikipedia.org/wiki/List_of_cities_and_towns_in_Goa'
html = requests.get(url, 'html.parser').text

In [3]:
soup = BeautifulSoup(html)
# soup

table = soup.find('table', attrs = {'class' : 'wikitable plainrowheaders sortable'})

city_list = [row.text.strip() for row in table.find_all('th')][8:]

city_list

['Bicholim',
 'Canacona',
 'Cuncolim',
 'Curchorem',
 'Mapusa',
 'Margao',
 'Mormugao',
 'Panaji',
 'Pernem',
 'Ponda',
 'Quepem',
 'Sanguem',
 'Sanquelim',
 'Valpoi']

### cov_2_lat_long function takes in name of city and return its latitude and longitude

In [4]:
def cov_2_lat_long(city):
    lat_long = None
    while(lat_long is None):
        res = geocoder.arcgis('{}, Goa, India'.format(city))
#         print(res)
        lat_long = res.latlng
    return lat_long

### Creating the combined data frame of city, lat and long

In [5]:
data = pd.DataFrame(columns = ['City', 'Latitude', 'Longitude'])
for city in city_list:
    coords = cov_2_lat_long(city)
    data = data.append({'City' : city, 'Latitude' : coords[0], 'Longitude' : coords[1]}, ignore_index = True)

In [6]:
data

Unnamed: 0,City,Latitude,Longitude
0,Bicholim,15.58618,73.95539
1,Canacona,15.00824,74.04328
2,Cuncolim,15.18058,74.00016
3,Curchorem,15.25071,74.10544
4,Mapusa,15.5985,73.80955
5,Margao,15.2747,73.98014
6,Mormugao,15.39585,73.81568
7,Panaji,15.45844,73.80566
8,Pernem,15.72012,73.79802
9,Ponda,15.40612,74.00651


### Geting the Lat and Long of Goa for zooming into the map

In [7]:
geolocator = Nominatim(user_agent="Goa-India")
location = geolocator.geocode('Goa, India')
latitude = location.latitude
longitude = location.longitude
print('Goa @ {}, {}.'.format(latitude, longitude))

Goa @ 15.3004543, 74.0855134.


### Visualizing all cities on Map using Folium

In [8]:
map_goa = folium.Map(location=[latitude, longitude], zoom_start=9)


for lat, lng, neighborhood in zip(data['Latitude'], data['Longitude'], data['City']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='cyan',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_goa)  
    
map_goa

In [26]:
CLIENT_ID = 'Your client ID' # your Foursquare ID
CLIENT_SECRET = 'Your client secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Your client ID
CLIENT_SECRET:Your client secret


### Collecting venues near each cities with the radius of 2 kms

In [10]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(data['Latitude'], data['Longitude'], data['City']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [11]:
venues_data = pd.DataFrame(venues)

In [12]:
venues_data.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
venues_data

Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Bicholim,15.58618,73.95539,Government Gym,15.581934,73.954689,Gym
1,Bicholim,15.58618,73.95539,KTC Bus Terminus Bicholim,15.589038,73.949389,Bus Station
2,Bicholim,15.58618,73.95539,Hotel Shanbhag,15.589748,73.946312,Indian Restaurant
3,Canacona,15.00824,74.04328,Patnem Beach,14.997461,74.033991,Beach
4,Canacona,15.00824,74.04328,The Space Goa,15.016893,74.029459,Deli / Bodega
...,...,...,...,...,...,...,...
132,Sanquelim,15.56414,74.01004,Harvalem Waterfall,15.568929,74.015234,Trail
133,Sanquelim,15.56414,74.01004,Aravelam caves,15.557494,74.018224,Sculpture Garden
134,Sanquelim,15.56414,74.01004,On The Ways,15.558270,74.019844,Indian Restaurant
135,Sanquelim,15.56414,74.01004,Sharvraj Ecofarm,15.573146,73.998413,Water Park


In [13]:
print('Total of {} unique venues in the Neighborhood'.format(len(venues_data['VenueCategory'].unique())))

Total of 61 unique venues in the Neighborhood


### One hot encoding venues

In [14]:
goa_onehot = pd.get_dummies(venues_data[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
goa_onehot['Neighborhoods'] = venues_data['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [goa_onehot.columns[-1]] + list(goa_onehot.columns[:-1])
goa_onehot = goa_onehot[fixed_columns]

print(goa_onehot.shape)
goa_onehot.head()

(137, 62)


Unnamed: 0,Neighborhoods,ATM,Athletics & Sports,Bakery,Bar,Beach,Beach Bar,Boat or Ferry,Boutique,Breakfast Spot,...,Skate Park,Soccer Field,Spa,Stadium,Swiss Restaurant,Tapas Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Water Park
0,Bicholim,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bicholim,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bicholim,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Canacona,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Canacona,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Mean of venues grouped by Neighborhood

In [15]:
goa_grouped = goa_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(goa_grouped.shape)
goa_grouped

(14, 62)


Unnamed: 0,Neighborhoods,ATM,Athletics & Sports,Bakery,Bar,Beach,Beach Bar,Boat or Ferry,Boutique,Breakfast Spot,...,Skate Park,Soccer Field,Spa,Stadium,Swiss Restaurant,Tapas Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Water Park
0,Bicholim,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Canacona,0.0,0.025,0.025,0.025,0.1,0.025,0.0,0.0,0.0,...,0.0,0.0,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.0
2,Cuncolim,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,...,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Curchorem,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0
4,Mapusa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0
5,Margao,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Mormugao,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,...,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0
7,Panaji,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Pernem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Ponda,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
goa_restaurant = goa_grouped[['Neighborhoods']+[x for x in goa_grouped.columns  if 'Restaurant' in x]]

In [17]:
goa_restaurant

Unnamed: 0,Neighborhoods,Chinese Restaurant,Fast Food Restaurant,Goan Restaurant,Indian Restaurant,Restaurant,Seafood Restaurant,Swiss Restaurant,Tapas Restaurant,Vegetarian / Vegan Restaurant
0,Bicholim,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0
1,Canacona,0.0,0.0,0.025,0.1,0.1,0.075,0.025,0.025,0.0
2,Cuncolim,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0
3,Curchorem,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Mapusa,0.055556,0.055556,0.0,0.166667,0.0,0.0,0.0,0.0,0.0
5,Margao,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0
6,Mormugao,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111
7,Panaji,0.0,0.0,0.0,0.1,0.05,0.0,0.0,0.0,0.0
8,Pernem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Ponda,0.0,0.0,0.0,0.375,0.125,0.0,0.0,0.0,0.0


### Clustering cities using KNN algorithm

In [18]:
# set number of clusters
kclusters = 3

goa_clustering = goa_restaurant.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(goa_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]


array([2, 0, 0, 0, 0, 0, 0, 0, 0, 2], dtype=int32)

### adding cluster labels to the dataframe

In [19]:
goa_restaurant_cluster = goa_restaurant.copy()
goa_restaurant['Cluster'] = kmeans.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  goa_restaurant['Cluster'] = kmeans.labels_


In [20]:
columns = goa_restaurant.columns.tolist()[-1:] + goa_restaurant.columns.tolist()[:-1]

In [21]:
goa_restaurant = goa_restaurant[columns]
goa_restaurant.rename(columns = {'Neighborhoods' : 'Neighborhood'}, inplace = True)

In [22]:
goa_merged = goa_restaurant.join(venues_data.set_index("Neighborhood"), on="Neighborhood")

print(goa_merged.shape)
goa_merged.head() # check the last columns!

(137, 17)


Unnamed: 0,Cluster,Neighborhood,Chinese Restaurant,Fast Food Restaurant,Goan Restaurant,Indian Restaurant,Restaurant,Seafood Restaurant,Swiss Restaurant,Tapas Restaurant,Vegetarian / Vegan Restaurant,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,2,Bicholim,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,15.58618,73.95539,Government Gym,15.581934,73.954689,Gym
0,2,Bicholim,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,15.58618,73.95539,KTC Bus Terminus Bicholim,15.589038,73.949389,Bus Station
0,2,Bicholim,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,15.58618,73.95539,Hotel Shanbhag,15.589748,73.946312,Indian Restaurant
1,0,Canacona,0.0,0.0,0.025,0.1,0.1,0.075,0.025,0.025,0.0,15.00824,74.04328,Patnem Beach,14.997461,74.033991,Beach
1,0,Canacona,0.0,0.0,0.025,0.1,0.1,0.075,0.025,0.025,0.0,15.00824,74.04328,The Space Goa,15.016893,74.029459,Deli / Bodega


### Adding cities based on cluster to the map

In [23]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=9)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(goa_merged['Latitude'], goa_merged['Longitude'], goa_merged['Neighborhood'], goa_merged['Cluster']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [24]:
# goa_merged[goa_merged['Cluster'] == 0]
goa_grouped = goa_onehot.groupby(['Neighborhoods']).mean().reset_index()
goa_grouped.rename(columns = {'Neighborhoods' : 'Neighborhood'}, inplace = True)

goa_grouped = goa_grouped[['Neighborhood']+[x for x in goa_grouped.columns  if 'Restaurant' in x]]

### Printing top two restaurants from each city

In [25]:
num_top_restaurants = 2

for hood in goa_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = goa_grouped[goa_grouped['Neighborhood'] == hood].T.reset_index()
#     print(temp)
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_restaurants))
    print('\n')
    

----Bicholim----
                venue  freq
0   Indian Restaurant  0.33
1  Chinese Restaurant  0.00


----Canacona----
               venue  freq
0  Indian Restaurant   0.1
1         Restaurant   0.1


----Cuncolim----
                venue  freq
0   Indian Restaurant  0.12
1  Chinese Restaurant  0.00


----Curchorem----
                  venue  freq
0  Fast Food Restaurant  0.17
1    Chinese Restaurant  0.00


----Mapusa----
                venue  freq
0   Indian Restaurant  0.17
1  Chinese Restaurant  0.06


----Margao----
               venue  freq
0  Indian Restaurant   0.1
1         Restaurant   0.1


----Mormugao----
                           venue  freq
0  Vegetarian / Vegan Restaurant  0.11
1             Chinese Restaurant  0.00


----Panaji----
               venue  freq
0  Indian Restaurant  0.10
1         Restaurant  0.05


----Pernem----
                  venue  freq
0    Chinese Restaurant   0.0
1  Fast Food Restaurant   0.0


----Ponda----
               venue  freq
0  

# Analysis

Goa is a small tourist place, but we do have a lot of headroom when it comes to restaurant business,

In Sanguem city, There are no prominent Restaurant

In Valpoi and Sanquelim city, only Indian Restaurants are there, coming to the massive number of tourists who 
visit Goa, There is a great scope of investment in this area

In pernem, quepem and sanguem city, only prominent resturants are there, This also brings huge oppurtunity
