In [None]:
%load_ext autoreload
%autoreload 2

# Market share analysis

*Based on fast food restaurants in Prague, Czechia.*

The example will show how to analyse the local market share, defined by the nearest restaurant to a particular Prague resident.

The analysis uses demographic data from the Czech Statistical Office, with residential buildings and fast food restaurant positions downloaded from OpenStreetMap.

In [None]:
from srai.regionalizers import geocode_to_region_gdf

prague_area = geocode_to_region_gdf('Praha, CZ')
prague_area.explore(height=600)

## Load demographic data

In [None]:
import geopandas as gpd

cadastral_data = gpd.read_file('data/cadastral_data.geojson')
cadastral_data.explore(column='population', tiles="CartoDB positron", style_kwds=dict(opacity=0.25), height=600)

## Load residential buildings from OpenStreetMap

Data that we need is defined by [`building=residential`](https://wiki.openstreetmap.org/wiki/Tag:building%3Dresidential) tag in OSM.
We will be using `osmnx` library directly to keep all the additional tags (we want to use `building:flats` information).

In [None]:
import osmnx as ox

residential_buildings = ox.features_from_place(query='Praha, CZ', tags={"building": "residential"})
residential_buildings.head()

Parse numer of flats per building to a number (OSM tags values are strings).

In [None]:
def map_flats(flats_value: str) -> int:
    try:
        flats = int(flats_value)
    except:
        flats = 1

    return flats

residential_buildings['flats'] = residential_buildings['building:flats'].apply(map_flats)
residential_buildings = residential_buildings.reset_index()[['osmid', 'geometry', 'flats']]
residential_buildings.geometry = residential_buildings.geometry.apply(lambda geometry: geometry.centroid)

residential_buildings.head()

## Population interpolation

Using cadastral information and exact buildings positions, we will interpolate the population over each building using flats number as a weight.

In [None]:
from tqdm import tqdm

# bsu - basic settlement unit
for bsu in tqdm(cadastral_data.to_dict(orient='records')):
    matching_buildings = residential_buildings[residential_buildings.intersects(bsu['geometry'])]
    total_flats = matching_buildings['flats'].sum()
    for building_index, building_row in matching_buildings.iterrows():
        residential_buildings.loc[building_index, 'population'] = bsu['population'] * (building_row['flats'] / total_flats)
        
residential_buildings.head()

Plotting buildings with population

In [None]:
import matplotlib.pyplot as plt

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,8), dpi=300)
residential_buildings.plot('population', markersize=2, cmap='Spectral', alpha=0.1, ax=ax1)
residential_buildings.cx[14.42:14.47, 50.06:50.085].plot('population', cmap='Spectral', markersize=4, ax=ax2)
_ = ax1.axis('off'), ax2.axis('off'), fig.show()

## Loading data about fast food restaurants

Those features are defined in OSM with [`amenity=fast_food`](https://wiki.openstreetmap.org/wiki/Tag:amenity%3Dfast_food) tag.

In [None]:
pois  = ox.features_from_place(query='Praha, CZ', tags={"amenity": "fast_food"})
pois.head()

From those, we will filter out `KFC` and `McDonald's` to simplify the analysis.

In [None]:
brands = ["KFC", "McDonald's"]

Cafes

In [None]:
# brands = ["Starbucks", "Costa"]
# pois  = ox.features_from_place(query='Praha, CZ', tags={"amenity":"cafe"})
# pois.head()

Shops

In [None]:
# brands = ["Albert", "Billa", "Lidl", "PENNY", "Kaufland", "Tesco"]
# pois  = ox.features_from_place(query='Praha, CZ', tags={"shop":"supermarket"})
# pois.head()

In [None]:
pois = pois.reset_index()[['osmid', 'geometry', 'brand']]
pois = pois[pois.brand.isin(brands)]
pois.geometry = pois.geometry.apply(lambda geometry: geometry.centroid)
pois.set_index('osmid', inplace=True)
pois.head()

In [None]:
pois.brand.value_counts()

## Segmenting the area

Using `VoronoiRegionalizer` from `srai` library, we can divide the geospatial space into regions using Voronoi diagram.

Here we will be using restaurants as seeds to segment the Prague.

In [None]:
from srai.regionalizers import VoronoiRegionalizer

voronoi_regions = VoronoiRegionalizer(seeds=pois).transform(gdf=prague_area)
voronoi_regions.head()

Now we can join buildings with population into those generated regions. This way, we can assign the closest restaurant to each building.

In [None]:
population_in_regions = voronoi_regions.sjoin(residential_buildings).groupby('region_id')['population'].sum()
regions_with_population = voronoi_regions.join(pois[['brand']]).join(population_in_regions).fillna(0)
regions_with_population.head()

Using simple grouping operation, we can see what is the Prague's market share between those two brands.

In [None]:
brand_closest_population = regions_with_population.groupby('brand').agg({'population': 'sum', 'geometry': 'count'}).reset_index()
brand_closest_population.rename(columns={"geometry": "locations"}, inplace=True)
brand_closest_population['percentage'] = 100 * brand_closest_population['population'] / brand_closest_population['population'].sum()
brand_closest_population

## Map plotting

To analyse this market further, we will plot the regions in two distinc gradients based on brand's color.

In [None]:
brand_color_mapping = { "KFC": ("#fa9ea0", "#a3080c"), "McDonald's": ("#ffeec0", "#ffc72c") }

In [None]:
# brand_color_mapping = {
#     "KFC": ("#fa9ea0", "#a3080c"),
#     "McDonald's": ("#ffeec0", "#ffc72c"),

#     "Starbucks": ("#88ffd6", "#00704A"),
#     "Costa": ("#fe638a", "#74011e"),

#     "Albert": ("#66c2a5", "#1b9e77"),
#     "Billa": ("#fc8d62", "#d95f02"),
#     "Kaufland": ("#8da0cb", "#7570b3"),
#     "Lidl": ("#e78ac3", "#e7298a"),
#     "PENNY": ("#a6d854", "#66a61e"),
#     "Tesco": ("#ffd92f", "#e6ab02"),
# }

In [None]:
from srai.plotting.folium_wrapper import _generate_linear_colormap

prague_map = None

for brand, colors in brand_color_mapping.items():
    regions_subset = regions_with_population[regions_with_population["brand"] == brand]
    if not len(regions_subset):
        continue
    colormap = _generate_linear_colormap(colors, min_value=0, max_value=regions_subset["population"].max())
    colormap.caption = brand
    prague_map = regions_subset.explore(
        m=prague_map, column="population", cmap=colormap,
        tiles="CartoDB positron", style_kwds=dict(opacity=0.25, color=colors[1]),
    )

In [None]:
prague_map = pois.explore(
    m=prague_map, marker_kwds=dict(radius=3),
    style_kwds=dict(color="#444", opacity=1, fillColor="#f2f2f2", fillOpacity=1)
)

In [None]:
prague_map