In [13]:
from enum import Enum

import geopandas as gpd
import osmnx as ox
import pandas as pd
from keplergl import KeplerGl


class District(str, Enum):
    """Saint Petersburg districts."""

    admiralteysky = "admiralteysky"
    vasileostrovsky = "vasileostrovsky"
    vyborg = "vyborg"
    kalininsky = "kalininsky"
    kirovsky = "kirovsky"
    kolpinsky = "kolpinsky"
    krasnogvardeisky = "krasnogvardeisky"
    krasnoselsky = "krasnoselsky"
    kronstadt = "kronstadt"
    kurortny = "kurortny"
    moscow = "moscow"
    nevsky = "nevsky"
    petrogradsky = "petrogradsky"
    petrodvortsovy = "petrodvortsovy"
    primorsky = "primorsky"
    pushkinsky = "pushkinsky"
    frunzensky = "frunzensky"
    central = "central"

In [14]:
def get_spb_boundaries():
    spb = ox.geocode_to_gdf("R337422", by_osmid=True)
    return spb.geometry.iloc[0]
spb_boundaries = get_spb_boundaries()

In [15]:
def get_spb_district_from_osm(spb_boundaries, district: District) -> gpd.GeoDataFrame:
    districts_to_names = {
        District.primorsky:"Приморский район", 
        District.moscow:"Московский район",
        District.kurortny:"Курортный район",
        District.central:"Центральный район",
        District.pushkinsky:"Пушкинский район",
        District.frunzensky:"Фрунзенский район",
        District.vasileostrovsky:"Василеостровский район",
        District.kolpinsky:"Колпинский район",
        District.petrogradsky:"Петроградский район",
        District.krasnoselsky:"Красносельский район",
        District.kirovsky:"Кировский район",
        District.petrodvortsovy:"Петродворцовый район",
        District.admiralteysky: "Адмиралтейский район",
        District.nevsky:"Невский район",
        District.kalininsky:"Калининский район",
        District.krasnogvardeisky:"Красногвардейский район",
        District.vyborg:"Выборгский район"
    }
    spb_district = ox.features_from_polygon(spb_boundaries, tags={"name": districts_to_names[district]})
    spb_district = spb_district.dropna(subset=['addr:region'])
    spb_district = spb_district[spb_district['addr:region'] != 'Ленинградская область']
    spb_district = spb_district.reset_index()[['geometry', "name"]]
    return spb_district
spb_district = get_spb_district_from_osm(spb_boundaries, District.admiralteysky)
spb_district

Unnamed: 0,geometry,name
0,"POLYGON ((30.25023 59.90129, 30.25028 59.90132...",Адмиралтейский район


In [16]:
def get_spb_food_places_by_district(spb_district) -> gpd.GeoDataFrame:
    tags = {
        "amenity": ["cafe", "fast_food", "food_court", "restaurant"]
    }
    data = ox.features_from_polygon(spb_district.geometry.iloc[0], tags=tags).reset_index()
    return data
spb_food_places = get_spb_food_places_by_district(spb_district)
spb_food_places

Unnamed: 0,element_type,osmid,amenity,check_date,contact:instagram,cuisine,name,opening_hours,outdoor_seating,geometry,...,branch,nodes,building:levels,name:signed,roof:shape,indoor,ways,building:part,type,source:addr
0,node,311101157,cafe,2023-08-26,https://www.instagram.com/wonderful.taste.spb/,lebanese,Wonderful Taste,09:00-22:00,yes,POINT (30.28913 59.92477),...,,,,,,,,,,
1,node,463252316,cafe,2022-12-18,,,NowaDays,"Mo-Th,Su 12:00-21:00; Fr-Sa 12:00-22:00",,POINT (30.29158 59.93156),...,,,,,,,,,,
2,node,463252327,restaurant,2023-03-07,,chinese,Тайвань,12:00-22:30,,POINT (30.29681 59.91807),...,,,,,,,,,,
3,node,469739704,fast_food,,,burger,Вкусно — и точка,07:00-23:30,,POINT (30.33930 59.91947),...,,,,,,,,,,
4,node,469751000,fast_food,,,sandwich,Subway,,,POINT (30.32097 59.92451),...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,way,1160742355,cafe,,,,Перекрёсток,24/7,,"POLYGON ((30.32052 59.92646, 30.32069 59.92636...",...,,"[10795752510, 10776378559, 10776378575, 107957...",,,,area,,,,
775,way,1162595692,restaurant,,,international,Евразия,"Fr-Sa 11:00-05:00; Su, Mo-Th 11:00-23:00",,"POLYGON ((30.32038 59.92615, 30.32045 59.92618...",...,,"[10812281972, 10812281950, 10812281946, 108122...",,,,,,,,
776,way,1174379066,fast_food,2022-12-18,,crepe;russian,Теремок,Mo-Fr 09:00-22:00; Sa-Su 10:00-21:00,no,"POLYGON ((30.29335 59.93139, 30.29337 59.93138...",...,,"[1401261870, 10912081570, 10912081582, 1091208...",1,,,,,,,
777,relation,3310305,cafe,,,,,,,"POLYGON ((30.32138 59.91629, 30.32149 59.91631...",...,,"[[[1207178492, 2246768096], [779308103, 958345...",1,,,,"[244845263, 244845271, 215271131, 244845270, 1...",yes,multipolygon,


In [17]:
map = KeplerGl(data={"food_places": spb_food_places, 'districts': spb_district})
map

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


Out of range float values are not JSON compliant
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant
  content = self.pack(content)


KeplerGl(data={'food_places':     element_type       osmid     amenity  check_date  \
0           node   31110…

In [18]:
def spatial_join_food_places_and_district(food_places: gpd.GeoDataFrame, district: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    return gpd.sjoin(food_places, district).rename(columns={'name_left': 'name', 'name_right': 'district'}).drop(columns=['index_right'])
food_places_and_district = spatial_join_food_places_and_district(spb_food_places, spb_district)
food_places_and_district

Unnamed: 0,element_type,osmid,amenity,check_date,contact:instagram,cuisine,name,opening_hours,outdoor_seating,geometry,...,nodes,building:levels,name:signed,roof:shape,indoor,ways,building:part,type,source:addr,district
0,node,311101157,cafe,2023-08-26,https://www.instagram.com/wonderful.taste.spb/,lebanese,Wonderful Taste,09:00-22:00,yes,POINT (30.28913 59.92477),...,,,,,,,,,,Адмиралтейский район
1,node,463252316,cafe,2022-12-18,,,NowaDays,"Mo-Th,Su 12:00-21:00; Fr-Sa 12:00-22:00",,POINT (30.29158 59.93156),...,,,,,,,,,,Адмиралтейский район
2,node,463252327,restaurant,2023-03-07,,chinese,Тайвань,12:00-22:30,,POINT (30.29681 59.91807),...,,,,,,,,,,Адмиралтейский район
3,node,469739704,fast_food,,,burger,Вкусно — и точка,07:00-23:30,,POINT (30.33930 59.91947),...,,,,,,,,,,Адмиралтейский район
4,node,469751000,fast_food,,,sandwich,Subway,,,POINT (30.32097 59.92451),...,,,,,,,,,,Адмиралтейский район
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,way,1160742355,cafe,,,,Перекрёсток,24/7,,"POLYGON ((30.32052 59.92646, 30.32069 59.92636...",...,"[10795752510, 10776378559, 10776378575, 107957...",,,,area,,,,,Адмиралтейский район
775,way,1162595692,restaurant,,,international,Евразия,"Fr-Sa 11:00-05:00; Su, Mo-Th 11:00-23:00",,"POLYGON ((30.32038 59.92615, 30.32045 59.92618...",...,"[10812281972, 10812281950, 10812281946, 108122...",,,,,,,,,Адмиралтейский район
776,way,1174379066,fast_food,2022-12-18,,crepe;russian,Теремок,Mo-Fr 09:00-22:00; Sa-Su 10:00-21:00,no,"POLYGON ((30.29335 59.93139, 30.29337 59.93138...",...,"[1401261870, 10912081570, 10912081582, 1091208...",1,,,,,,,,Адмиралтейский район
777,relation,3310305,cafe,,,,,,,"POLYGON ((30.32138 59.91629, 30.32149 59.91631...",...,"[[[1207178492, 2246768096], [779308103, 958345...",1,,,,"[244845263, 244845271, 215271131, 244845270, 1...",yes,multipolygon,,Адмиралтейский район


In [19]:
def preprocess_raw_data(food_places_and_districts):
    needed_columns = ["geometry", "opening_hours", "cuisine", "amenity", "delivery", "name", "district"]
    food_places_and_districts: gpd.GeoDataFrame = food_places_and_districts.loc[:, needed_columns]
    food_places_and_districts: gpd.GeoDataFrame = food_places_and_districts[food_places_and_districts['cuisine'] != 'coffee_shop']
    map = {
        "delivery": {
            "yes" : True,
            "only": True,
            "no": False,
            "limited": True,
            "Mo-Su 08:00-22:00": True,
            "12:00-23:00": True,
            "11:00-21:00": True,
            "Mo-Su 11:00-22:00": True,
            "10:00-23:00": True,
            "10:00-21:00": True,
            "10:00-20:00": True,
            "12:30-21:00": True,
            "12:00-21:00": True,
            "11:30-17:00": True
        },
        "amenity": {
            "fast_food": "canteen",
            "food_court": "canteen"
        }
    }
    food_places_and_districts.replace(map, inplace=True)
    food_places_and_districts['delivery'].bfill(inplace=True)
    food_places_and_districts['delivery'].ffill(inplace=True)

    time_pattern = r'(\d{2}:\d{2}-\d{2}:\d{2})'
    food_places_and_districts['opening_hours'] = food_places_and_districts['opening_hours'].str.extract(time_pattern)
    food_places_and_districts['opening_hours'].bfill(inplace=True)
    food_places_and_districts['opening_hours'].ffill(inplace=True)
    open_close_time_pattern = r'(\d{2}):(\d{2})-(\d{2}):(\d{2})'
    food_places_and_districts['open_time'] = food_places_and_districts['opening_hours'].str.extract(open_close_time_pattern)[0].astype(int)
    food_places_and_districts['close_time'] = food_places_and_districts['opening_hours'].str.extract(open_close_time_pattern)[2].astype(int)
    cuisine_map = {
        'kebab': 'uzbekistan',
        'burger': 'mexico',
        'georgian': 'georgia',
        'pizza': 'italy',
        'shawarma': 'uzbekistan',
        'sushi': 'japan',
        'chinese': 'china',
        'italian': 'italy',
        'russian': 'russia',
        'japanese': 'japan',
        'korean': 'korea',
        'mexican': 'mexico',
        'local': 'russia',
        'doner': 'uzbekistan',
        'ramen': 'japan',
        'ukrainian': 'russia'
    }
    food_places_and_districts['cuisine'] = food_places_and_districts['cuisine'].map(cuisine_map)
    food_places_and_districts['cuisine'].bfill(inplace=True)
    food_places_and_districts['cuisine'].ffill(inplace=True)
    return food_places_and_districts
food_places_and_districts_prep = preprocess_raw_data(food_places_and_district)

1. Улица - находим Polygon в указанном районе, где нету заведений с указанным типом кухни и типом заведения. В этом полигоне берем несколько объявлений циан 
2. Точный адрес помещения - берем из объявления циан в пункте 1
3. Площадь помещения (м2) - берем из объявления циан в пункте 1
4. Цена за помещение (руб/мес) - берем из объявления циан в пункте 1
5. Выгодное время работы (временной промежуток) - берем все заведения в районе (неважно какой тип, кухня) и смотрим самое раннее открытие N и самое позднее закрытие M. Возьмем для простоты выгодное время работы как [N-1ч, M+2ч].
6. Возможность доставки (по возможности доставки соседних заведений) - берем все заведения в районе (неважно какой тип, кухня). Если больше, чем 50% имеют доставку, то не будем рекомендовать ее. Если меньше или равно, то будем рекомендовать.



In [20]:
food_places_and_districts_prep

Unnamed: 0,geometry,opening_hours,cuisine,amenity,delivery,name,district,open_time,close_time
0,POINT (30.28913 59.92477),09:00-22:00,china,cafe,True,Wonderful Taste,Адмиралтейский район,9,22
1,POINT (30.29158 59.93156),12:00-21:00,china,cafe,True,NowaDays,Адмиралтейский район,12,21
2,POINT (30.29681 59.91807),12:00-22:30,china,restaurant,True,Тайвань,Адмиралтейский район,12,22
3,POINT (30.33930 59.91947),07:00-23:30,mexico,canteen,True,Вкусно — и точка,Адмиралтейский район,7,23
4,POINT (30.32097 59.92451),08:00-24:00,russia,canteen,True,Subway,Адмиралтейский район,8,24
...,...,...,...,...,...,...,...,...,...
774,"POLYGON ((30.32052 59.92646, 30.32069 59.92636...",11:00-05:00,mexico,cafe,True,Перекрёсток,Адмиралтейский район,11,5
775,"POLYGON ((30.32038 59.92615, 30.32045 59.92618...",11:00-05:00,mexico,restaurant,True,Евразия,Адмиралтейский район,11,5
776,"POLYGON ((30.29335 59.93139, 30.29337 59.93138...",09:00-22:00,mexico,canteen,True,Теремок,Адмиралтейский район,9,22
777,"POLYGON ((30.32138 59.91629, 30.32149 59.91631...",06:00-23:30,mexico,cafe,True,,Адмиралтейский район,6,23
