# Export Geodata from OpenStreetMap - OSM Only

In [None]:
import geopandas as gpd
import osm2geojson
import requests
import pandas as pd
from pathlib import Path
import numpy as np

## Defines

In [None]:
overpass_url = "http://overpass-api.de/api/interpreter"
export_folder = Path('../export')
export_folder_shapes = export_folder / 'shapes'
export_folder_db = export_folder / 'settlements.gpkg'

## Get Data from OSM

In [None]:
# Create export folder
if not export_folder.exists():
    export_folder.mkdir()

if not export_folder_shapes.exists():
    export_folder_shapes.mkdir()

In [None]:
query = """
    [out:json];
    area["name"="Донецька область"]->.area_0;
    area["name"="Луганська область"]->.area_1;
    (
        node["place"="city"](area.area_0);
        node["place"="town"](area.area_0);
        relation["place"="city"](area.area_0);
        relation["place"="town"](area.area_0);
        node["place"="city"](area.area_1);
        node["place"="town"](area.area_1);
        relation["place"="city"](area.area_1);
        relation["place"="town"](area.area_1);
    );
    (._;>;);
    out body;
"""

In [None]:
response = requests.get(overpass_url, params={'data': query})
if response.status_code == 200:
    
    # convert result to GeoDataFrame
    data = response.json()
    geojson = osm2geojson.json2geojson(data)
    gdf = gpd.GeoDataFrame.from_features(geojson)

    gdf.rename(columns={'type':'overpasstype'}, inplace=True)

    # Tags to columns
    tags_df = gdf['tags'].apply(pd.Series)

    # Remove names, too much to handle...
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('name:') | tags_df.columns.str.startswith('name:en')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_name:')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_old_name:')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_name:')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_alt_name:')]
    
    gdf_raw = pd.concat([gdf.drop(columns=['tags']), tags_df], axis=1)

    gdf_raw.crs = 'EPSG:4326'

    # Some cleaning
    gdf_raw = gdf_raw[gdf_raw['overpasstype'] == 'relation']
    gdf_raw['population']  = gdf_raw['population'].astype(float)
    gdf_raw = gdf_raw.sort_values('population', ascending=False)

    # If you wish to only get cities with population over an 10k, add this line
    # gdf_raw = gdf_raw[gdf_raw.population >= 10000]    

    gdf_raw.to_file(export_folder_db)

## Add column: `occupied_since`
This calculates for each settlement area when it was conquered. We need this to color them accordingly. We use shapefiles from liveuamap for this. If you do not have these, you can skip this step.

Values:
* `2014` Occupied during krim and donezk occupation
* `2022` Occupied during the 2022 invasion
* `not_occupied` Not occupied

In [47]:
advances_folder = Path('../../ukraine_mapping/export/areas_history/')
advances_day = '2025-03-05'

# Load 2014 shapes
gdf_2014 = gpd.read_file(Path('../data/separatist-area.geojson'))
geom_separatist = gdf_2014.iloc[0].geometry

# Load current advances
gdf_2022 = gpd.read_file(advances_folder / Path(f'{advances_day}.geojson'))
gdf_2022 = gdf_2022[gdf_2022['fill'] == '#ce4631'] # Only russian advances
geom_advances = gdf_2022.iloc[0].geometry

gdf_raw['occupied_since']  = 'not-captured'

# Add 2022
gdf_raw['occupied_since'] = gdf_raw.apply(
    lambda row: '2022' if row.geometry.within(geom_advances) else row.occupied_since, axis=1
)

# Add 2014
gdf_raw['occupied_since'] = gdf_raw.apply(
    lambda row: '2014' if row.geometry.within(geom_separatist) else row.occupied_since, axis=1
)

gdf_raw.to_file(export_folder_db)

## Get all necessary shapes for each city

In [None]:
def request_overpass(query, type, clipping_geometry):
    response = requests.get(overpass_url, params={'data': query})
    if response.status_code == 200:
        
        # convert result to GeoDataFrame
        data = response.json()
        geojson = osm2geojson.json2geojson(data)
        gdf = gpd.GeoDataFrame.from_features(geojson)

        gdf.rename(columns={'type':'overpasstype'}, inplace=True)

        # Tags to columns
        tags_df = gdf['tags'].apply(pd.Series)
        
        # Remove names, too much to handle...
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('name:') | tags_df.columns.str.startswith('name:en')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_name:')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_old_name:')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_name:')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_alt_name:')]
        
        gdf = pd.concat([gdf.drop(columns=['tags']), tags_df], axis=1)

        gdf['_type'] = type

        # Clip
        gdf = gpd.clip(gdf, clipping_geometry)
        gdf.crs = 'EPSG:4326'

        return gdf
    
# def get_bounds(gdf):
#     bbox = gdf.bounds.values[0]
#     return ( bbox[1], bbox[0], bbox[3], bbox[2])

In [None]:
query_buildings = """
[out:json];
(
    way["building"]({x1},{y1},{x2},{y2});
    relation["building"]({x1},{y1},{x2},{y2});
);
(._;>;);
out body;
"""

query_streets = """
[out:json];
(
    way["highway"]({x1},{y1},{x2},{y2});
);
(._;>;);
out body;
"""

query_railway = """
[out:json];
(
    way["railway"]({x1},{y1},{x2},{y2});
);
(._;>;);
out body;
"""

In [None]:
# Get Railway
def get_railway(name, geometry):

    try:

        out_file = export_folder_shapes / f"{name}_railway.gpkg"

        if out_file.exists():
            return

        y1, x1, y2, x2 = geometry.bounds

        gdf_railway = request_overpass(
            query_railway.format(
                x1 = x1, y1 = y1, x2 = x2, y2 = y2
            ), 'railway',
            geometry
        )

        # Only lines
        gdf_railway = gdf_railway[gdf_railway.geometry.type.isin(['LineString', 'MultiLineString'])]

        # Only take this types
        railway = ['rail']
        gdf_railway = gdf_railway[gdf_railway['railway'].isin(railway)]

        # Store
        gdf_railway.to_file(out_file)

    except:
        print(f"Error downloading Railway for {name}")        

In [None]:
# Get Buildings
def get_buildings(name, geometry):

    try:

        out_file = export_folder_shapes / f"{name}_buildings.gpkg"
            
        if out_file.exists():
            return

        y1, x1, y2, x2 = geometry.bounds

        gdf_buildings = request_overpass(query_buildings.format(
                x1 = x1, y1 = y1, x2 = x2, y2 = y2
            ),
            'buildings',
            geometry
        )


        # Remove FIXME, causes error
        if 'FIXME' in gdf_buildings.columns:
            gdf_buildings = gdf_buildings.drop(columns=['FIXME'])

        # Only Polygons
        gdf_buildings = gdf_buildings[gdf_buildings.geometry.type.isin(['Polygon', 'MultiPolygon'])]

        # Store
        gdf_buildings.to_file(out_file)

    except:
        print(f"Error downloading Buildings for {name}")        

In [None]:
# Get Streets
def get_streets(name, geometry):

    try:

        out_file = export_folder_shapes / f"{name}_streets.gpkg"

        if out_file.exists():
            return

        y1, x1, y2, x2 = geometry.bounds

        gdf_streets = request_overpass(
            query_streets.format(
                x1 = x1, y1 = y1, x2 = x2, y2 = y2
            ),
            'streets',
            geometry
        )

        # Only lines
        gdf_streets = gdf_streets[gdf_streets.geometry.type.isin(['LineString', 'MultiLineString'])]

        # Store
        gdf_streets.to_file(out_file)

    except:
        print(f"Error downloading streets for {name}")

## Start export
This is fail safe: If you need to stop, no worries, you can start it again and it will start where it left off.

In [None]:
def get_name(row):
    if (row['name:en'] is not np.nan) and (row['name:en'] is not None):
        return row['name:en'].lower()
    else:
        return row['name'].lower()

for i, row in gdf_raw.iterrows():

    name = get_name(row)

    # print(f"Download data for {name}")
    
    # Download data
    get_buildings(name, row.geometry)
    get_streets(name, row.geometry)
    get_railway(name, row.geometry)

    # Store Boundaries
    gdf_city = gpd.GeoDataFrame([row], crs=gdf_raw.crs)
    gdf_city.to_file(export_folder_shapes / f"{name}_boundaries.gpkg")