# Location Selection

Now, we need to use the data we obtained to help make a decision on where to locate the business. For this example, we will explore two possible scenarios:

- Casual dining Thai restaurant targeting upper-middle class families and working professionals
- Pub targeting college/university students and young working professionals

A metric needs to be created so each potential venue can be graded on multiple factors - level of competition, size of potential market, and investment required.

In [1]:
import folium
import geopandas as gpd
import h3
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import MaxAbsScaler

In [2]:
df = gpd.read_feather('../data/bangalore_clustered.feather')
pd.options.display.max_colwidth = 20
df.head()

Unnamed: 0,id,cluster,address,geometry,pop_total,cost_sqft,ATM,Arts & Entertainment,Asian Restaurant,Athletics & Sports,...,Quick Bites,Residence,Restaurant,Salon,School,Shop & Service,Shopping Mall,Spiritual Center,Travel & Transport,Vegetarian / Vegan Restaurant
0,8861892db3fffff,1,"Yelahanka, Kempe...",POLYGON ((77.613...,1413.220043,4896.586595,0,1,0,2,...,0,4,1,0,0,2,0,2,0,0
1,886016975dfffff,1,"Kempegowda, Yela...",POLYGON ((77.613...,1762.379434,4970.4214,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,8860169759fffff,1,"Kempegowda, Yela...",POLYGON ((77.609...,1786.718829,4877.673619,0,0,0,0,...,0,1,2,0,1,0,0,0,0,0
3,8860169645fffff,2,"Bellary Road, Am...",POLYGON ((77.600...,2635.117082,5134.151371,0,2,3,1,...,0,0,1,0,0,9,1,0,0,0
4,886016962dfffff,1,Chowdeswari Ward...,POLYGON ((77.592...,1853.947643,4827.231325,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
scaler = MaxAbsScaler()
feature_data = df.drop(columns = ['id', 'cluster', 'geometry', 'address', ])
scaled_features = scaler.fit_transform(feature_data)

df_features = pd.DataFrame(scaled_features, columns = feature_data.columns, index = df['id'])

df_features.describe()

Unnamed: 0,pop_total,cost_sqft,ATM,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Automotive Shop,Bakery & Dessert,Bank,Cafeteria,...,Quick Bites,Residence,Restaurant,Salon,School,Shop & Service,Shopping Mall,Spiritual Center,Travel & Transport,Vegetarian / Vegan Restaurant
count,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,...,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0
mean,0.122115,0.439715,0.032497,0.084607,0.085804,0.103274,0.065881,0.115328,0.130217,0.020362,...,0.068415,0.133817,0.144685,0.109464,0.080958,0.121919,0.02309,0.081121,0.015393,0.036773
std,0.151771,0.107068,0.132261,0.155868,0.162408,0.145896,0.126624,0.166674,0.196224,0.073389,...,0.130154,0.165503,0.181013,0.187712,0.176379,0.159005,0.091568,0.136964,0.092789,0.108179
min,0.005117,0.281397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.028074,0.367463,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.049522,0.407094,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.071429,0.111111,0.0,0.0,0.076923,0.0,0.0,0.0,0.0
75%,0.165546,0.487066,0.0,0.2,0.25,0.142857,0.111111,0.142857,0.2,0.0,...,0.166667,0.214286,0.222222,0.25,0.0,0.153846,0.0,0.142857,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


We will use a weighted sum of selected features to create a score for each location. Complementary venues should increase the score and competition or other detractors should reduce the score.

A function is defined to calculate a location's score. An option has 

In [4]:
def scoreLocation(location, weights = None):
    # Equal weights for all features unless specified
    if not weights:
        cols = location.index
        n = len(cols)
        wt = [1/n for i in range(n)]
        weights = dict(zip(cols, wt))
        
    score = 0 # Initialize score
    for col, weight in weights.items():
        score += location[col] * weight
    
    return score

## Scenario 1: Casual Dining

For this type of restaurant, our primary competition is other similar restaurants, plus fast food or snack joints to some extent. Complementary venues would include residential areas, office locations, shopping malls and movie theaters - these should generally be good indicators of high footfall.

In [5]:
# Assign relative importance of different venues
weights = {
    'pop_total': 10,
    'cost_sqft': -15,
    'Asian Restaurant': -10,
    'Indian Restaurant': -7,
    'Restaurant': -7,
    'Vegetarian / Vegan Restaurant': -5,
    'Quick Bites': -3,
    'Fast Food': -3,
    'Residence': 15,
    'Office': 12,
    'Shopping Mall': 10,
    'Movie Theater': 10
}

In [6]:
scores = pd.Series(dtype = 'float')
counts = []
# For each cell, we will add the cells own score plus 20% of the score of neighbouring cells.
for id, row in df_features.iterrows():
    score = scoreLocation(row, weights)
    neighbours = list(h3.k_ring(id, 1))
    neighbours.remove(id) # Remove root cell - no double counting
    count = 0 # Number of neighbours (check)
    for n in neighbours:
        if n in df_features.index:
            score += (0.2 * scoreLocation(df_features.loc[n], weights))
            count +=1
    scores[id] = score
    counts.append(count)

df_scores_1 = df.copy().set_index('id', drop = False)
df_scores_1.insert(2, 'score', scores)
df_scores_1 = df_scores_1.sort_values('score', ascending = False)

# Display top 10 locations
pd.options.display.max_colwidth = 110
df_scores_1[['id', 'cluster', 'score', 'address']].head(10)

Unnamed: 0_level_0,id,cluster,score,address
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8861892461fffff,8861892461fffff,0,7.278042,"Mangammanapalya, Bommanahalli Zone, Bengaluru, Bangalore South, Bangalore Urban, Karnataka, 560068, India"
8861892eb1fffff,8861892eb1fffff,0,2.562121,"Richards Town, Sagayarapuram Ward, East Zone, Bengaluru, Bangalore North, Bangalore Urban, Karnataka, 5600..."
8861892463fffff,8861892463fffff,1,2.278311,"Mangammanapalya, Bommanahalli Zone, Bengaluru, Bangalore South, Bangalore Urban, Karnataka, 560068, India"
8861892eb7fffff,8861892eb7fffff,1,2.06735,"Muneshwara Nagar, East Zone, Bengaluru, Bangalore North, Bangalore Urban, Karnataka, 560084, India"
8860145a2dfffff,8860145a2dfffff,1,1.614485,"K H Ranganath Colony, Rayapuram Ward, West Zone, Bengaluru, Bangalore North, Bangalore Urban, Karnataka, 5..."
8861892e03fffff,8861892e03fffff,0,1.427976,"Mapple Heights Apartments, Vijnana Nagar, Mahadevapura Zone, Bengaluru, Bangalore East, Bangalore Urban, K..."
88618921d3fffff,88618921d3fffff,0,1.193497,"R. Narayanapura, Hagadur, Mahadevapura Zone, Bengaluru, Bangalore East, Bangalore Urban, Karnataka, 560066..."
8861892e1bfffff,8861892e1bfffff,3,1.021457,"Volvo, Bagmane Tech Park Backgate, Bagmane Tech Park, Hosa Tippasandra, East Zone, Bengaluru, Bangalore Ea..."
88618924b3fffff,88618924b3fffff,0,0.9229,"AGS Layout, Uttarahalli, Bommanahalli Zone, Bengaluru, Bangalore South, Bangalore Urban, Karnataka, 560061..."
8861892439fffff,8861892439fffff,0,0.870111,"Bommanahalli Ward, Bommanahalli Zone, Bengaluru, Bangalore South, Bangalore Urban, Karnataka, 76, India"


In [7]:
map_centre = (12.9792,77.5916)

map1 = folium.Map(location = map_centre, zoom_start = 11)

bins = [
    df_scores_1['score'].min(),
    df_scores_1['score'].quantile(0.50),
    df_scores_1['score'].quantile(0.85),
    df_scores_1['score'].quantile(0.95),
    df_scores_1['score'].quantile(0.99),
    df_scores_1['score'].max(),
]

choropleth = folium.Choropleth(
    geo_data = df_scores_1,
    data = df_scores_1['score'],
    key_on = 'id',
    fill_color = 'YlGnBu',
    fill_opacity = 0.8,
    nan_fill_opacity = 0.0,
    line_opacity = 0.9,
    legend_name = 'Score (higher is better)',
    bins = bins,
    highlight = True,
)

popup = folium.GeoJsonPopup(
    fields = ['id', 'address', 'score'],
    aliases = ['Hex ID', 'Address', 'Score'],
).add_to(choropleth.geojson)

map1.add_child(choropleth)

map1 # Display map

## Scenario 2: College Bar

Here, our primary targets are college students and young working professionals - so look for areas near colleges or offices, that do not already have a lot of competition.

In [8]:
# Assign relative importance of different venues
weights = {
    'pop_total': 10,
    'cost_sqft': -15,
    'Nightlife Spot': -15.0,
    'Residence': 5,
    'Office': 10,
    'Shopping Mall': 10,
    'Movie Theater': 10,
    'College & University': 15,
    'Arts & Entertainment': 7,
}

In [9]:
scores = pd.Series(dtype = 'float')
counts = []
# For each cell, we will add the cells own score plus 20% of the score of neighbouring cells.
for id, row in df_features.iterrows():
    score = scoreLocation(row, weights)
    neighbours = list(h3.k_ring(id, 1))
    neighbours.remove(id) # Remove root cell - no double counting
    count = 0 # Number of neighbours (check)
    for n in neighbours:
        if n in df_features.index:
            score += (0.2 * scoreLocation(df_features.loc[n], weights))
            count +=1
    scores[id] = score
    counts.append(count)

df_scores_2 = df.copy().set_index('id', drop = False)
df_scores_2.insert(2, 'score', scores)
df_scores_2 = df_scores_2.sort_values('score', ascending = False)

# Display top 10 locations
pd.options.display.max_colwidth = 110
df_scores_2[['id', 'cluster', 'score', 'address']].head(10)

Unnamed: 0_level_0,id,cluster,score,address
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8861892185fffff,8861892185fffff,3,12.474769,"Kadugodi, Mahadevapura Zone, Sheegehalli, Bangalore East, Bangalore Urban, Karnataka, 56066, India"
8860145b1dfffff,8860145b1dfffff,3,11.676285,"Hospital-ayurvedic Homeo Clinic, 2nd Main Road, Prakash Nagar Ward, West Zone, Bengaluru, Bangalore North,..."
88618925d9fffff,88618925d9fffff,2,8.668393,"MICO Layout, BTM Layout Ward, South Zone, Bengaluru, Bangalore South, Bangalore Urban, Karnataka, 560069, ..."
88618925d5fffff,88618925d5fffff,3,8.447275,"Bismillah Nagar, Gurappanapalya Ward, South Zone, Bengaluru, Bangalore South, Bangalore Urban, Karnataka, ..."
8860145b0bfffff,8860145b0bfffff,3,8.420726,"Ramachandra Pura, Okalipuram Ward, West Zone, Bengaluru, Bangalore North, Bangalore Urban, Karnataka, 560 ..."
88618925d1fffff,88618925d1fffff,3,7.645867,"Sahakari Vidyakendra AHPS Jayanagar, East End B Main Road, NAL Layout, Jayanagar East Ward, South Zone, Be..."
886189219dfffff,886189219dfffff,3,7.364474,"MLCP 3, MLCP Road, ITPB (formely ITPL), Hudi, Mahadevapura Zone, Bengaluru, Bangalore East, Bangalore Urba..."
8861892589fffff,8861892589fffff,3,7.14199,"Christ University, Dr. M H Marigowda Road, Suddagunte Palya Ward, South Zone, Bengaluru, Bangalore South, ..."
8860145b03fffff,8860145b03fffff,2,7.136441,"7th Main Road, Sriramapura, Dayananda Nagar Ward, West Zone, Bengaluru, Bangalore North, Bangalore Urban, ..."
8861892cd7fffff,8861892cd7fffff,0,7.028429,"4th Cross Road, Anandanagar, Hebbala Ward, East Zone, Bengaluru, Bangalore North, Bangalore Urban, Karnata..."


In [10]:
map_centre = (12.9792,77.5916)

map2 = folium.Map(location = map_centre, zoom_start = 11)

bins = [
    df_scores_2['score'].min(),
    df_scores_2['score'].quantile(0.50),
    df_scores_2['score'].quantile(0.85),
    df_scores_2['score'].quantile(0.95),
    df_scores_2['score'].quantile(0.99),
    df_scores_2['score'].max(),
]

choropleth = folium.Choropleth(
    geo_data = df_scores_2,
    data = df_scores_2['score'],
    key_on = 'id',
    fill_color = 'YlGnBu',
    fill_opacity = 0.8,
    nan_fill_opacity = 0.0,
    line_opacity = 0.9,
    legend_name = 'Score (higher is better)',
    bins = bins,
    highlight = True,
)

popup = folium.GeoJsonPopup(
    fields = ['id', 'address', 'score'],
    aliases = ['Hex ID', 'Address', 'Score'],
).add_to(choropleth.geojson)

map2.add_child(choropleth)

map2 # Display map