# Finding location for new Indian Restaurant in London

### Neighborhood Candidates

latitude & longitude coordinates creation for centroids of candidate neighborhoods. Create a grid of cells covering area of interest which is aprox. 12x12 killometers centered around London city center.

In [1]:
import requests

from geopy.geocoders import Nominatim


address = 'City of London, London, United Kingdom'
geolocator = Nominatim(user_agent="london_explorer")
location = geolocator.geocode(address)
lat = location.latitude
lng = location.longitude
london_center = [lat, lng]
print('Coordinate of {}: {}'.format(address, london_center), ' location : ', location)

Coordinate of City of London, London, United Kingdom: [51.5156177, -0.0919983]  location :  City of London, London, Greater London, England, EC2V 5AE, UK


create a grid of area candidates, equaly spaced, centered around city center and within ~6km from London. calculate distances we need to create our grid of locations in Cartesian 2D coordinate system which allows us to calculate distances in meters. Then we'll project those coordinates back to latitude/longitude degrees to be shown on Folium map.

In [2]:
#!pip install shapely
import shapely.geometry

#!pip install pyproj
import pyproj

import math

def lonlat_to_xy(lon, lat):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    xy = pyproj.transform(proj_latlon, proj_xy, lon, lat)
    return xy[0], xy[1]

def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

def calc_xy_distance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    return math.sqrt(dx*dx + dy*dy)

print('Coordinate transformation check')
print('-------------------------------')
print('London center longitude={}, latitude={}'.format(london_center[1], london_center[0]))
x, y = lonlat_to_xy(london_center[1], london_center[0])
print('London center UTM X={}, Y={}'.format(x, y))
lo, la = xy_to_lonlat(x, y)
print('London center longitude={}, latitude={}'.format(lo, la))

Coordinate transformation check
-------------------------------
London center longitude=-0.0919983, latitude=51.5156177
London center UTM X=-544366.1348823695, Y=5815953.23418613
London center longitude=-0.09199829999999391, latitude=51.51561769999999


Let's create a **hexagonal grid of cells**: we offset every other row, and adjust vertical row spacing so that **every cell center is equally distant from all it's neighbors**.

In [3]:
london_center_x, london_center_y = lonlat_to_xy(london_center[1], london_center[0]) # City center in Cartesian coordinates

k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_min = london_center_x - 6000
x_step = 600
y_min = london_center_y - 6000 - (int(21/k)*k*600 - 12000)/2
y_step = 600 * k 

latitudes = []
longitudes = []
distances_from_center = []
xs = []
ys = []
for i in range(0, int(21/k)):
    y = y_min + i * y_step
    x_offset = 300 if i%2==0 else 0
    for j in range(0, 21):
        x = x_min + j * x_step + x_offset
        distance_from_center = calc_xy_distance(london_center_x, london_center_y, x, y)
        if (distance_from_center <= 6001):
            lon, lat = xy_to_lonlat(x, y)
            latitudes.append(lat)
            longitudes.append(lon)
            distances_from_center.append(distance_from_center)
            xs.append(x)
            ys.append(y)

print(len(latitudes), 'candidate neighborhood centers generated.')

364 candidate neighborhood centers generated.


Let's visualize the data we have so far: city center location and candidate neighborhood centers:

In [4]:
import folium

In [5]:
map_london = folium.Map(location=london_center, zoom_start=13)
folium.Marker(london_center, popup='City of Londonz').add_to(map_london)
for lat, lon in zip(latitudes, longitudes):
    #folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin) 
    folium.Circle([lat, lon], radius=300, color='blue', fill=False).add_to(map_london)
    #folium.Marker([lat, lon]).add_to(map_berlin)
map_london

OK, we now have the coordinates of centers of neighborhoods/areas to be evaluated, equally spaced (distance from every point to it's neighbors is exactly the same) and within ~6km from London. 

In [8]:
def get_address(lat, lng):
    #print('entering get address')
    try:
        #address = '{},{}'.format(lat, lng)
        address = [lat, lng]
        geolocator = Nominatim(user_agent="london_explorer")
        location = geolocator.geocode(address)
        #print(location[0])
        return location[0]
    except:
        return 'nothing found'


addr = get_address(london_center[0], london_center[1])
print('Reverse geocoding check')
print('-----------------------')
print('Address of [{}, {}] is: {}'.format(london_center[0], london_center[1], addr)) 
print(type(location[0]))

Reverse geocoding check
-----------------------
Address of [51.5156177, -0.0919983] is: Roman Amphitheatre Site, Guildhall Yard, Temple, London, Greater London, England, EC2V 5AA, UK
<class 'str'>


In [9]:
print('Obtaining location addresses: ', end='')
addresses = []
for lat, lon in zip(latitudes, longitudes):
    address = get_address(lat, lon)
    if address is None:
        address = 'NO ADDRESS'
    address = address.replace(', England', '') # We don't need country part of address
    addresses.append(address)
    print(' .', end='')
print(' done.')

Obtaining location addresses:  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [11]:
import pandas as pd

df_locations = pd.DataFrame({'Address': addresses,
                             'Latitude': latitudes,
                             'Longitude': longitudes,
                             'X': xs,
                             'Y': ys,
                             'Distance from center': distances_from_center})

df_locations.head()

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center
0,"Poplar Road, Herne Hill, London Borough of Lam...",51.4627,-0.100251,-546166.134882,5810237.0,5992.495307
1,"Acland Crescent, Herne Hill, London Borough of...",51.463799,-0.091912,-545566.134882,5810237.0,5840.3767
2,"Dog Kennel Hill School, Grove Hill Road, East ...",51.464897,-0.083573,-544966.134882,5810237.0,5747.173218
3,"46, Danby Street, Bellenden, London, Greater L...",51.465995,-0.075233,-544366.134882,5810237.0,5715.767665
4,"188, Rye Lane, Ledbury Estate, Peckham, London...",51.467093,-0.066893,-543766.134882,5810237.0,5747.173218


In [12]:
df_locations.shape

(364, 6)

In [13]:
df_locations.to_pickle('./Dataset/locations.pkl')    

## Foursquare

In [14]:
client_id = 'XXXXXXXXX'
client_secret = 'XXXXXXXX'
VERSION = 'XXXXXXX'

We're interested in venues in 'food' category, but only those that are proper restaurants - coffe shops, pizza places, bakeries etc. are not direct competitors so we don't care about those. So we will include in out list only venues that have 'restaurant' in category name, and we'll make sure to detect and include all the subcategories of specific 'Indian restaurant' category, as we need info on Indian restaurants in the neighborhood.


In [15]:
food_category = '4d4b7105d754a06374d81259' # 'Root' category for all food-related venues

indian_restaurant_categories = ['4bf58dd8d48988d10f941735', '54135bf5e4b08f3d2429dfe5', '54135bf5e4b08f3d2429dff3',
                                 '54135bf5e4b08f3d2429dff5', '54135bf5e4b08f3d2429dfe2', '54135bf5e4b08f3d2429dff2',
                                 '54135bf5e4b08f3d2429dfe1', '54135bf5e4b08f3d2429dfe3', '54135bf5e4b08f3d2429dfe8',
                                 '54135bf5e4b08f3d2429dfe9', '54135bf5e4b08f3d2429dfe6', '54135bf5e4b08f3d2429dfdf',
                                 '54135bf5e4b08f3d2429dfe4', '54135bf5e4b08f3d2429dfe7', '54135bf5e4b08f3d2429dfea',
                                 '54135bf5e4b08f3d2429dfeb', '54135bf5e4b08f3d2429dfed', '54135bf5e4b08f3d2429dfee',
                                 '54135bf5e4b08f3d2429dff4', '54135bf5e4b08f3d2429dfe0', '54135bf5e4b08f3d2429dfdd',
                                 '54135bf5e4b08f3d2429dff6', '54135bf5e4b08f3d2429dfef', '54135bf5e4b08f3d2429dff0',
                                 '54135bf5e4b08f3d2429dff1', '54135bf5e4b08f3d2429dfde', '54135bf5e4b08f3d2429dfec']



In [16]:
def is_restaurant(categories, specific_filter=None):
    restaurant_words = ['restaurant', 'diner', 'indian', 'kebab']
    restaurant = False
    specific = False
    for c in categories:
        category_name = c[0].lower()
        category_id = c[1]
        for r in restaurant_words:
            if r in category_name:
                restaurant = True
        if 'fast food' in category_name:
            restaurant = False
        if not(specific_filter is None) and (category_id in specific_filter):
            specific = True
            restaurant = True
    return restaurant, specific

def get_categories(categories):
    return [(cat['name'], cat['id']) for cat in categories]

def format_address(location):
    address = ', '.join(location['formattedAddress'])
    address = address.replace(', England', '')
    address = address.replace(', United Kingdom', '')
    return address

def get_venues_near_location(lat, lon, category, client_id, client_secret, radius=500, limit=100):
    version = '20180724'
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
        client_id, client_secret, version, lat, lon, category, radius, limit)
    try:
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues = [(item['venue']['id'],
                   item['venue']['name'],
                   get_categories(item['venue']['categories']),
                   (item['venue']['location']['lat'], item['venue']['location']['lng']),
                   format_address(item['venue']['location']),
                   item['venue']['location']['distance']) for item in results]        
    except:
        venues = []
    return venues

In [17]:
# Let's now go over our neighborhood locations and get nearby restaurants; we'll also maintain a dictionary of all found restaurants and all found indian restaurants

import pickle

def get_restaurants(lats, lons):
    restaurants = {}
    indian_restaurants = {}
    location_restaurants = []

    print('Obtaining venues around candidate locations:', end='')
    for lat, lon in zip(lats, lons):
        # Using radius=350 to meke sure we have overlaps/full coverage so we don't miss any restaurant (we're using dictionaries to remove any duplicates resulting from area overlaps)
        venues = get_venues_near_location(lat, lon, food_category, client_id, client_secret, radius=350, limit=100)
        area_restaurants = []
        for venue in venues:
            venue_id = venue[0]
            venue_name = venue[1]
            venue_categories = venue[2]
            venue_latlon = venue[3]
            venue_address = venue[4]
            venue_distance = venue[5]
            is_res, is_indian = is_restaurant(venue_categories, specific_filter=indian_restaurant_categories)
            if is_res:
                x, y = lonlat_to_xy(venue_latlon[1], venue_latlon[0])
                restaurant = (venue_id, venue_name, venue_latlon[0], venue_latlon[1], venue_address, venue_distance, is_indian, x, y)
                if venue_distance<=300:
                    area_restaurants.append(restaurant)
                restaurants[venue_id] = restaurant
                if is_indian:
                    indian_restaurants[venue_id] = restaurant
        location_restaurants.append(area_restaurants)
        print(' .', end='')
    print(' done.')
    return restaurants, indian_restaurants, location_restaurants

# Try to load from local file system in case we did this before
restaurants = {}
indian_restaurants = {}
location_restaurants = []
loaded = False
try:
    with open('/Dataset/restaurants_350.pkl', 'rb') as f:
        restaurants = pickle.load(f)
        print('Restaurant data loaded.')
    with open('/Dataset/indian_restaurants_350.pkl', 'rb') as f:
        indian_restaurants = pickle.load(f)
        print('Indian Restaurant data loaded.')
    with open('/Dataset/location_restaurants_350.pkl', 'rb') as f:
        location_restaurants = pickle.load(f)
        print('location Restaurant data loaded.')
    loaded = True
except:
    print('Restaurant data not loaded.')
    pass

# If load failed use the Foursquare API to get the data
if not loaded:
    restaurants, indian_restaurants, location_restaurants = get_restaurants(latitudes, longitudes)
    

Restaurant data not loaded.
Obtaining venues around candidate locations: . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [18]:
import numpy as np

print('Total number of restaurants:', len(restaurants))
print('Total number of Indian restaurants:', len(indian_restaurants))
print('Percentage of Indian restaurants: {:.2f}%'.format(len(indian_restaurants) / len(restaurants) * 100))
print('Average number of restaurants in neighborhood:', np.array([len(r) for r in location_restaurants]).mean())

Total number of restaurants: 2624
Total number of Indian restaurants: 194
Percentage of Indian restaurants: 7.39%
Average number of restaurants in neighborhood: 6.428571428571429


In [19]:
print('List of all restaurants')
print('-----------------------')
for r in list(restaurants.values())[:10]:
    print(r)
print('...')
print('Total:', len(restaurants))

List of all restaurants
-----------------------
('4f9189cee4b02081d15891d6', 'Kung Fu Panda', 51.46421255038107, -0.08145689037808392, 'United Kingdom', 165, False, -544836.2983153368, 5810131.201179047)
('53369f55498ec0b8cac28896', 'Artusi', 51.46672969666967, -0.07284656623155056, '161 Bellenden Rd, London, Greater London, SE15 4DH', 184, False, -544184.6752071743, 5810283.805052945)
('4fddd6b97beb69b52ad8a0c1', 'The Begging Bowl', 51.467395706158484, -0.07247517930704497, '168 Bellenden Rd (at Choumert Rd), Peckham, Greater London, SE15 4BW', 246, False, -544143.6184572631, 5810351.865550189)
('52f2a2e4498ed94ca73e6a1e', 'Ganapati Takeaway Kitchen', 51.466247, -0.072811, '4 Maxted Road, London, Greater London', 170, True, -544193.4406975273, 5810230.055158369)
('4e3d2b52483b04e17a91da46', 'Lovely House', 51.46789189338897, -0.07241128097607828, '119 Bellenden Road, Camberwell, Greater London', 287, False, -544127.6882852758, 5810405.659747191)
('56994b03498e09226e6d2bd1', 'Mr Bao', 

In [20]:
print('List of Indian restaurants')
print('---------------------------')
for r in list(indian_restaurants.values())[:10]:
    print(r)
print('...')
print('Total:', len(indian_restaurants))

List of Indian restaurants
---------------------------
('52f2a2e4498ed94ca73e6a1e', 'Ganapati Takeaway Kitchen', 51.466247, -0.072811, '4 Maxted Road, London, Greater London', 170, True, -544193.4406975273, 5810230.055158369)
('4cb9f096035d236aa82ed74e', 'Spices Indian Takeaway', 51.46736801892181, -0.05416712337854114, 'United Kingdom', 347, True, -542883.1481729683, 5810083.289799659)
('556b057d498e61c8e74bd149', "Baba G's", 51.46347481755735, -0.11235810045205079, 'Pop Brixton', 267, True, -546982.1215036643, 5810499.047809861)
('4ae2be59f964a520658f21e3', 'Ganapati', 51.469633991195614, -0.07292398157248463, '38 Holly Grove, Peckham, Greater London, SE15 5DF', 159, True, -544122.5248518178, 5810605.225287648)
('4e35b28c6284ea7e11033a7e', 'Red Spice', 51.465985688684114, -0.12519615638778278, '91 Landor Rd, London, Greater London, SW9 9RT', 346, True, -547807.9180537098, 5810962.843073968)
('57b9d422498e6b1fc15351c0', 'Booma', 51.471407, -0.112719, 'Ingleton Street (Brixton Rd), Lon

In [21]:
print('Restaurants around location')
print('---------------------------')
for i in range(100, 110):
    rs = location_restaurants[i][:8]
    names = ', '.join([r[1] for r in rs])
    print('Restaurants around location {}: {}'.format(i+1, names))

Restaurants around location
---------------------------
Restaurants around location 101: Chino Latino, City Cafe, Sirena's, Park Plaza Riverbank Restaurant
Restaurants around location 102: The Corner Cafe, Pharmacy 2, Elements Restaurant
Restaurants around location 103: La Parma
Restaurants around location 104: Leños y Carbón, Nando's, Tai Tip Mein, Chatica, Jenny's Restaurant, Tupi
Restaurants around location 105: Bunyadi
Restaurants around location 106: Pizarro, Flour & Grape, Casse-Crôute, Locanda del Melo, José, Caphe House, Lokma Turkish Restaurant, Tower Tandoori
Restaurants around location 107: Bar Tozino, Bone Daddies Bermondsey, St. John Maltby
Restaurants around location 108: Japindo, Poppy Hana
Restaurants around location 109: Simplicity
Restaurants around location 110: Chai Thai Garden, breakfast @yha, FOOD by Ryan Jude


All the collected restaurants in our area of interest on map, and Indian color are displayed in red.

In [22]:
map_london = folium.Map(location=london_center, zoom_start=13)
folium.Marker(london_center, popup='City of London').add_to(map_london)
for res in restaurants.values():
    lat = res[2]; lon = res[3]
    is_indian = res[6]
    color = 'red' if is_indian else 'blue'
    folium.CircleMarker([lat, lon], radius=3, color=color, fill=True, fill_color=color, fill_opacity=1).add_to(map_london)
map_london

## Analysis

Explanatory data analysis and some additional info from raw data. Count the **number of restaurants in every area candidate**:

In [23]:
location_restaurants_count = [len(res) for res in location_restaurants]

df_locations['Restaurants in area'] = location_restaurants_count

print('Average number of restaurants in every area with radius=300m:', np.array(location_restaurants_count).mean())

df_locations.head(10)

Average number of restaurants in every area with radius=300m: 6.428571428571429


Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area
0,"Poplar Road, Herne Hill, London Borough of Lam...",51.4627,-0.100251,-546166.134882,5810237.0,5992.495307,0
1,"Acland Crescent, Herne Hill, London Borough of...",51.463799,-0.091912,-545566.134882,5810237.0,5840.3767,0
2,"Dog Kennel Hill School, Grove Hill Road, East ...",51.464897,-0.083573,-544966.134882,5810237.0,5747.173218,1
3,"46, Danby Street, Bellenden, London, Greater L...",51.465995,-0.075233,-544366.134882,5810237.0,5715.767665,4
4,"188, Rye Lane, Ledbury Estate, Peckham, London...",51.467093,-0.066893,-543766.134882,5810237.0,5747.173218,6
5,"82, Kirkwood Road, Ledbury Estate, Nunhead, Lo...",51.46819,-0.058552,-543166.134882,5810237.0,5840.3767,1
6,"Arbuthnot Road, Telegraph Hill, New Cross Gate...",51.469286,-0.050211,-542566.134882,5810237.0,5992.495307,3
7,"Cream's, 392-394, Brixton Road, Stockwell, Lon...",51.465561,-0.114283,-547066.134882,5810757.0,5855.766389,13
8,"94-102 Angell Road, Angell Road, Stockwell, Lo...",51.466661,-0.105945,-546466.134882,5810757.0,5604.462508,0
9,"Northlands Street, London Borough of Lambeth, ...",51.46776,-0.097605,-545866.134882,5810757.0,5408.326913,0


OK, now let's calculate the **distance to nearest Indian restaurant from every area candidate center** (not only those within 300m - we want distance to closest one, regardless of how distant it is).

In [24]:
distances_to_indian_restaurant = []

for area_x, area_y in zip(xs, ys):
    min_distance = 10000
    for res in indian_restaurants.values():
        res_x = res[7]
        res_y = res[8]
        d = calc_xy_distance(area_x, area_y, res_x, res_y)
        if d<min_distance:
            min_distance = d
    distances_to_indian_restaurant.append(min_distance)

df_locations['Distance to Indian restaurant'] = distances_to_indian_restaurant

In [25]:
df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area,Distance to Indian restaurant
0,"Poplar Road, Herne Hill, London Borough of Lam...",51.4627,-0.100251,-546166.134882,5810237.0,5992.495307,0,856.889104
1,"Acland Crescent, Herne Hill, London Borough of...",51.463799,-0.091912,-545566.134882,5810237.0,5840.3767,0,882.553768
2,"Dog Kennel Hill School, Grove Hill Road, East ...",51.464897,-0.083573,-544966.134882,5810237.0,5747.173218,1,772.729727
3,"46, Danby Street, Bellenden, London, Greater L...",51.465995,-0.075233,-544366.134882,5810237.0,5715.767665,4,172.853145
4,"188, Rye Lane, Ledbury Estate, Peckham, London...",51.467093,-0.066893,-543766.134882,5810237.0,5747.173218,6,427.370083
5,"82, Kirkwood Road, Ledbury Estate, Nunhead, Lo...",51.46819,-0.058552,-543166.134882,5810237.0,5840.3767,1,322.26067
6,"Arbuthnot Road, Telegraph Hill, New Cross Gate...",51.469286,-0.050211,-542566.134882,5810237.0,5992.495307,3,352.516507
7,"Cream's, 392-394, Brixton Road, Stockwell, Lon...",51.465561,-0.114283,-547066.134882,5810757.0,5855.766389,13,271.366485
8,"94-102 Angell Road, Angell Road, Stockwell, Lo...",51.466661,-0.105945,-546466.134882,5810757.0,5604.462508,0,576.908758
9,"Northlands Street, London Borough of Lambeth, ...",51.46776,-0.097605,-545866.134882,5810757.0,5408.326913,0,537.82666


In [26]:
print('Average distance to closest Indian restaurant from each area center:', df_locations['Distance to Indian restaurant'].mean())


Average distance to closest Indian restaurant from each area center: 574.7025638998874


Let's crete a map showing **heatmap / density of restaurants** and try to extract insights from that. 

In [27]:
london_boroughs_url = 'https://raw.githubusercontent.com/blackmad/neighborhoods/master/london.geojson'
london_boroughs = requests.get(london_boroughs_url).json()

def boroughs_style(feature):
    return { 'color': 'blue', 'fill': False }

In [28]:
restaurant_latlons = [[res[2], res[3]] for res in restaurants.values()]

indian_latlons = [[res[2], res[3]] for res in indian_restaurants.values()]

In [30]:
from folium import plugins
from folium.plugins import HeatMap

map_london = folium.Map(location=london_center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_london) #cartodbpositron cartodbdark_matter
HeatMap(restaurant_latlons).add_to(map_london)
folium.Marker(london_center).add_to(map_london)
folium.Circle(london_center, radius=1000, fill=False, color='white').add_to(map_london)
folium.Circle(london_center, radius=2000, fill=False, color='white').add_to(map_london)
folium.Circle(london_center, radius=3000, fill=False, color='white').add_to(map_london)
map_london

Let's create another heatmap map showing **heatmap/density of Indian restaurants** only.

In [32]:
map_london = folium.Map(location=london_center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_london) #cartodbpositron cartodbdark_matter
HeatMap(indian_latlons).add_to(map_london)
folium.Marker(london_center).add_to(map_london)
folium.Circle(london_center, radius=1000, fill=False, color='white').add_to(map_london)
folium.Circle(london_center, radius=2000, fill=False, color='white').add_to(map_london)
folium.Circle(london_center, radius=3000, fill=False, color='white').add_to(map_london)
map_london

Let's define new, more narrow region of interest, which will include borough of Southwark

In [44]:
roi_x_min = london_center_x - 2000
roi_y_max = london_center_y + 1000
roi_width = 5000
roi_height = 5000
roi_center_x = roi_x_min + 2500
roi_center_y = roi_y_max - 2500
roi_center_lon, roi_center_lat = xy_to_lonlat(roi_center_x, roi_center_y)
roi_center = [roi_center_lat, roi_center_lon]

map_london = folium.Map(location=roi_center, zoom_start=14)
HeatMap(restaurant_latlons).add_to(map_london)
folium.Marker(london_center).add_to(map_london)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_london)
map_london

Let's also create new, more dense grid of location candidates restricted to our new region of interest 

In [34]:
k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_step = 100
y_step = 100 * k 
roi_y_min = roi_center_y - 2500

roi_latitudes = []
roi_longitudes = []
roi_xs = []
roi_ys = []
for i in range(0, int(51/k)):
    y = roi_y_min + i * y_step
    x_offset = 50 if i%2==0 else 0
    for j in range(0, 51):
        x = roi_x_min + j * x_step + x_offset
        d = calc_xy_distance(roi_center_x, roi_center_y, x, y)
        if (d <= 2501):
            lon, lat = xy_to_lonlat(x, y)
            roi_latitudes.append(lat)
            roi_longitudes.append(lon)
            roi_xs.append(x)
            roi_ys.append(y)

print(len(roi_latitudes), 'candidate neighborhood centers generated.')

2261 candidate neighborhood centers generated.


Calculate two most important things for each location candidate: **number of restaurants in vicinity** (we'll use radius of **250 meters**) and **distance to closest Indian restaurant**.

In [35]:
def count_restaurants_nearby(x, y, restaurants, radius=250):    
    count = 0
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=radius:
            count += 1
    return count

def find_nearest_restaurant(x, y, restaurants):
    d_min = 100000
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=d_min:
            d_min = d
    return d_min

roi_restaurant_counts = []
roi_indian_distances = []

print('Generating data on location candidates... ', end='')
for x, y in zip(roi_xs, roi_ys):
    count = count_restaurants_nearby(x, y, restaurants, radius=250)
    roi_restaurant_counts.append(count)
    distance = find_nearest_restaurant(x, y, indian_restaurants)
    roi_indian_distances.append(distance)
print('done.')


Generating data on location candidates... done.


In [36]:
# Let's put this into dataframe
df_roi_locations = pd.DataFrame({'Latitude':roi_latitudes,
                                 'Longitude':roi_longitudes,
                                 'X':roi_xs,
                                 'Y':roi_ys,
                                 'Restaurants nearby':roi_restaurant_counts,
                                 'Distance to Indian restaurant':roi_indian_distances})
#df.sort_values(by=['Brand'], inplace=True)

df_roi_locations.sort_values(by=['Restaurants nearby'], ascending=False, inplace=True)

df_roi_locations.head(5)

Unnamed: 0,Latitude,Longitude,X,Y,Restaurants nearby,Distance to Indian restaurant
1976,51.518243,-0.076898,-543266.134882,5816024.0,37,90.274296
2260,51.525579,-0.080832,-543366.134882,5816890.0,36,103.646409
2259,51.525396,-0.082224,-543466.134882,5816890.0,36,126.343227
1279,51.504493,-0.091868,-544616.134882,5814725.0,36,76.193617
1937,51.517583,-0.075949,-543216.134882,5815937.0,36,128.705479


In [37]:
df_roi_locations.shape

(2261, 6)

OK. Let us now **filter** those locations: we're interested only in **locations with no more than two restaurants in radius of 250 meters**, and **no Indian restaurants in radius of 400 meters**.

In [38]:
good_res_count = np.array((df_roi_locations['Restaurants nearby']<=2))
print('Locations with no more than two restaurants nearby:', good_res_count.sum())

good_ind_distance = np.array(df_roi_locations['Distance to Indian restaurant']>=400)
print('Locations with no Indian restaurants within 400m:', good_ind_distance.sum())

good_locations = np.logical_and(good_res_count, good_ind_distance)
print('Locations with both conditions met:', good_locations.sum())

df_good_locations = df_roi_locations[good_locations]


Locations with no more than two restaurants nearby: 1042
Locations with no Indian restaurants within 400m: 1019
Locations with both conditions met: 740


In [39]:
good_latitudes = df_good_locations['Latitude'].values
good_longitudes = df_good_locations['Longitude'].values

good_locations = [[lat, lon] for lat, lon in zip(good_latitudes, good_longitudes)]

map_london = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_london)
HeatMap(restaurant_latlons).add_to(map_london)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.6).add_to(map_london)
folium.Marker(london_center).add_to(map_london)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_london) 
map_london

In [43]:
map_london = folium.Map(location=roi_center, zoom_start=14)
HeatMap(good_locations, radius=25).add_to(map_london)
folium.Marker(london_center).add_to(map_london)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_london)
map_london

Let us now **cluster** those locations to create **centers of zones containing good locations**. Those zones, their centers and addresses will be the final result of our analysis. 

In [45]:
from sklearn.cluster import KMeans

number_of_clusters = 15

good_xys = df_good_locations[['X', 'Y']].values
kmeans = KMeans(n_clusters=number_of_clusters, random_state=0).fit(good_xys)

cluster_centers = [xy_to_lonlat(cc[0], cc[1]) for cc in kmeans.cluster_centers_]

map_london = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_london)
HeatMap(restaurant_latlons).add_to(map_london)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_london)
folium.Marker(london_center).add_to(map_london)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=True, fill_opacity=0.25).add_to(map_london) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_london)
map_london

Let's see those zones on a city map without heatmap, using shaded areas to indicate our clusters:

In [46]:
map_london = folium.Map(location=roi_center, zoom_start=14)
folium.Marker(london_center).add_to(map_london)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#00000000', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_london)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_london)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_london) 
map_london

Let's zoom in on candidate areas in Borugh of Southwark:

In [47]:
map_london = folium.Map(location=[51.4899, -0.0929], zoom_start=15)
folium.Marker(london_center).add_to(map_london)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_london) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_london)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_london)
map_london

In [48]:
map_london = folium.Map(location=[51.4970125, -0.063268], zoom_start=15)
folium.Marker(london_center).add_to(map_london)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_london) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_london)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_london)
map_london

In [52]:
candidate_area_addresses = []
print('==============================================================')
print('Addresses of centers of areas recommended')
print('==============================================================\n')
for lon, lat in cluster_centers:
    addr = get_address(lat, lon)
    addr = addr.replace(', England', '')
    addr = addr.replace(', London', '')
    addr = addr.replace(', UK', '')
    addr = addr.replace(', Greater London', '')
    addr = addr.replace("'", '')
    candidate_area_addresses.append(addr)    
    x, y = lonlat_to_xy(lon, lat)
    d = calc_xy_distance(x, y, london_center_x, london_center_y)
    print('{}{} => {:.1f}km from City of London'.format(addr, ' '*(50-len(addr)), d/1000))
    

Addresses of centers of areas recommended

Stevenson Crescent, Ledbury Estate, South Bermondsey Borough of Southwark, SE16 3EN => 3.7km from City of London
Penry Street, Ledbury Estate, Walworth Borough of Southwark, SE17 => 3.0km from City of London
Granary Road, Globe Town, Whitechapel Borough of Tower Hamlets, E1 5DF => 2.2km from City of London
Bermondsey Wall East, Bermondsey Borough of Southwark, SE16 4UA => 2.5km from City of London
The Queens Walk Borough of Southwark, SE1 9AE      => 1.2km from City of London
Michael Faraday Primary School, Hopwood Road, Walworth Borough of Southwark, SE17 => 3.5km from City of London
Polperro Mews Borough of Lambeth, SE11 4TY         => 2.7km from City of London
Albion Primary School, Clack Street, Canada Water Borough of Southwark, SE16 7JA => 3.4km from City of London
Waite Street, Trafalgar Avenue, Ledbury Estate, Peckham Borough of Southwark, SE15 => 3.7km from City of London
4, Henshaw Street, Walworth Borough of Southwark, SE17 => 2.6km

## Result

Visualising the 15 zones obtained by the analysis

In [51]:
map_london = folium.Map(location=roi_center, zoom_start=14)
folium.Circle(london_center, radius=50, color='red', fill=True, fill_color='red', fill_opacity=1).add_to(map_london)
for lonlat, addr in zip(cluster_centers, candidate_area_addresses):
    folium.Marker([lonlat[1], lonlat[0]], popup=addr).add_to(map_london)     
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.05).add_to(map_london)
map_london