In [1]:
import os
import pickle
import osmnx as ox
import pandas as pd
import geopandas as gpd
from blocksnet.blocks.cutting import preprocess_urban_objects, cut_urban_blocks # type: ignore
from blocksnet.blocks.assignment import assign_land_use                         # type: ignore
from blocksnet.enums import LandUse                                             # type: ignore

In [6]:
df = pd.read_csv('data/graphs_data/RU.txt', sep='\t', header=None)[[1, 14, 18]]
df.columns = ['name', 'population', 'date']
names = df[(df['population'] > 800_000) & (df['population'] < 200_000_000)].sort_values(by='population', ascending=True).name.values
print(f'Количество городов: {len(names)}')

  df = pd.read_csv('data/graphs_data/RU.txt', sep='\t', header=None)[[1, 14, 18]]


Количество городов: 91


In [7]:
RULES = {
    'commercial': LandUse.BUSINESS,
    'industrial': LandUse.INDUSTRIAL,
    'cemetery': LandUse.SPECIAL,
    'garages': LandUse.INDUSTRIAL,
    'residential': LandUse.RESIDENTIAL,
    'retail': LandUse.BUSINESS,
    'grass': LandUse.RECREATION,
    'farmland': LandUse.AGRICULTURE,
    'construction': LandUse.SPECIAL,
    'brownfield': LandUse.INDUSTRIAL,
    'forest': LandUse.RECREATION,
    'recreation_ground': LandUse.RECREATION,
    'religious': LandUse.SPECIAL,
    'flowerbed': LandUse.RECREATION,
    'military': LandUse.SPECIAL,
    'landfill': LandUse.TRANSPORT
}

BC_TAGS = {
    'roads': {
        "highway": [
            "construction", "crossing", "living_street", "motorway", "motorway_link", "motorway_junction",
            "pedestrian", "primary", "primary_link", "raceway", "residential", "road", "secondary",
            "secondary_link", "services", "tertiary", "tertiary_link", "track", "trunk", "trunk_link",
            "turning_circle", "turning_loop", "unclassified"
        ],
        "service": ["living_street", "emergency_access"]
    },
    'railways': {
        "railway": "rail"
    },
    'water': {
        'riverbank': True,
        'reservoir': True,
        'basin': True,
        'dock': True,
        'canal': True,
        'pond': True,
        'natural': ['water', 'bay'],
        'waterway': ['river', 'canal', 'ditch'],
        'landuse': 'basin',
        'water': 'lake'
    }
}

def process_and_save_city(city_name, folder="cities", crs='EPSG:3857'):
    """Загружает данные города, формирует блоки, сохраняет в {city}.pkl"""
    os.makedirs(folder, exist_ok=True)
    boundaries = ox.geocode_to_gdf(city_name)

    # Получаем water
    try:
        water = ox.features_from_polygon(boundaries.union_all(), BC_TAGS['water']).reset_index(drop=True)
        water = water[water.geom_type.isin(['Polygon', 'MultiPolygon', 'LineString', 'MultiLineString'])].copy()
        if water.empty:
            water = None
        else:
            water.to_crs(crs, inplace=True)
    except Exception:
        water = None

    # Получаем roads
    try:
        roads = ox.features_from_polygon(boundaries.union_all(), BC_TAGS['roads']).reset_index(drop=True)
        roads = roads[roads.geom_type.isin(['LineString', 'MultiLineString'])].copy()
        if roads.empty:
            roads = None
        else:
            roads.to_crs(crs, inplace=True)
    except Exception:
        roads = None

    # Получаем railways
    try:
        railways = ox.features_from_polygon(boundaries.union_all(), BC_TAGS['railways']).reset_index(drop=True)
        railways = railways[railways.geom_type.isin(['LineString', 'MultiLineString'])].copy()
        if railways.empty:
            railways = None
        else:
            railways.to_crs(crs, inplace=True)
    except Exception:
        railways = None

    boundaries.to_crs(crs, inplace=True)

    lines, polygons = preprocess_urban_objects(
        roads_gdf=roads if roads is not None else None,
        railways_gdf=railways if railways is not None else None,
        water_gdf=water if water is not None else None
    )
    blocks = cut_urban_blocks(boundaries, lines, polygons)

    # Получаем функциональные зоны
    try:
        functional_zones = ox.features_from_polygon(boundaries.to_crs(4326).union_all(), tags={'landuse': True})
        functional_zones = functional_zones[functional_zones.geom_type.isin(['Polygon', 'MultiPolygon'])].copy()
        functional_zones = functional_zones.reset_index(drop=True)[['geometry', 'landuse']].rename(columns={'landuse': 'functional_zone'})
        if functional_zones.empty:
            functional_zones = None
        else:
            functional_zones = functional_zones.to_crs(crs)
    except Exception:
        functional_zones = None

    if functional_zones is not None:
        blocks = assign_land_use(
            blocks,
            functional_zones.reset_index(drop=True),
            RULES
        )
        blocks.dropna(subset=['land_use'], inplace=True)
        blocks = blocks[['geometry', 'land_use', 'share']][blocks.share > 0.8]
        blocks.reset_index(drop=True, inplace=True)
    else:
        blocks['land_use'] = None
        blocks['share'] = None

    # Сохраняем
    filename = os.path.join(folder, f"{city_name}.pkl")
    with open(filename, "wb") as f:
        pickle.dump(blocks, f)
    return blocks

def load_and_merge_cities(folder="cities"):
    """Загружает все .pkl из папки и объединяет в один GeoDataFrame"""
    gdfs = []
    for fname in os.listdir(folder):
        if fname.endswith(".pkl"):
            with open(os.path.join(folder, fname), "rb") as f:
                gdf = pickle.load(f)
                gdf["city"] = os.path.splitext(fname)[0]
                gdfs.append(gdf)
    if gdfs:
        return gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)
    else:
        return gpd.GeoDataFrame()

In [None]:
# Пример использования:
for name in names:
    try:
        process_and_save_city(f"{name}")
    except:
        print(f"Ошибка при обработке города {name}. Возможно, нет данных или ошибка в запросе.")
        continue

Ошибка при обработке города Respublika Mordoviya. Возможно, нет данных или ошибка в запросе.


  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [4]:
all_blocks = load_and_merge_cities()
all_blocks['land_use'] = all_blocks['land_use'].astype(str)
all_blocks = all_blocks[~all_blocks['land_use'].isin(['None'])]
all_blocks.groupby('land_use').size().sort_values(ascending=False)

  return gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)


land_use
LandUse.RESIDENTIAL    86308
LandUse.AGRICULTURE     5513
LandUse.INDUSTRIAL      4666
LandUse.SPECIAL          972
LandUse.RECREATION       468
LandUse.BUSINESS         408
LandUse.TRANSPORT          7
dtype: int64