In [1]:
import geopandas as gpd
import pyarrow as pa
import pyarrow.parquet as pq

from src import Config
from src.application.common import BuildingHandler
from src.application.services import OpenStreetMapService

In [2]:
def export_buildings_to_parquet(
        open_street_map_service: OpenStreetMapService,
        parquet_file: str,
        batch_size: int = 5000
) -> None:
    first = True
    for chunk in (open_street_map_service.yield_building_chunks(batch_size=batch_size)):
        gdf = gpd.GeoDataFrame.from_features(chunk)

        table = pa.Table.from_pandas(df=gdf, preserve_index=False)

        if first:
            pq.write_table(table, parquet_file)
            first = False
        else:
            pq.write_table(table, parquet_file, append=True)


In [3]:
if not Config.OSM_FILE_PATH.is_file():
    OpenStreetMapService.download_pbf()

In [4]:
building_handler = BuildingHandler()
osm_service = OpenStreetMapService(building_handler=building_handler)

In [None]:
export_buildings_to_parquet(osm_service, "buildings.parquet", batch_size=5000)