# Export Geodata from OpenStreetMap - OSM Only

In [1]:
import geopandas as gpd
import osm2geojson
import requests
import pandas as pd
from pathlib import Path
import numpy as np

## Defines

In [14]:
overpass_url = "http://overpass-api.de/api/interpreter"
export_folder = Path('../export')
export_folder_shapes = export_folder / 'shapes'
export_folder_db = export_folder / 'settlements-all.gpkg'

In [7]:
if not export_folder.exists():
    export_folder.mkdir()

## Get Data from OSM

In [3]:
query = """
    [out:json];
    area["name"="Донецька область"]->.area_0;
    area["name"="Луганська область"]->.area_1;
    (
        node["place"="city"](area.area_0);
        node["place"="town"](area.area_0);
        relation["place"="city"](area.area_0);
        relation["place"="town"](area.area_0);
        node["place"="city"](area.area_1);
        node["place"="town"](area.area_1);
        relation["place"="city"](area.area_1);
        relation["place"="town"](area.area_1);
    );
    (._;>;);
    out body;
"""

In [15]:
response = requests.get(overpass_url, params={'data': query})
if response.status_code == 200:
    
    # convert result to GeoDataFrame
    data = response.json()
    geojson = osm2geojson.json2geojson(data)
    gdf = gpd.GeoDataFrame.from_features(geojson)

    gdf.rename(columns={'type':'overpasstype'}, inplace=True)

    # Tags to columns
    tags_df = gdf['tags'].apply(pd.Series)

    # Remove names, too much to handle...
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('name:') | tags_df.columns.str.startswith('name:en')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_name:')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_old_name:')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_name:')]
    tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_alt_name:')]
    
    gdf_raw = pd.concat([gdf.drop(columns=['tags']), tags_df], axis=1)

    gdf_raw.crs = 'EPSG:4326'

    gdf_raw.to_file(export_folder_db)

## Add column: `occupied_since`
This calculates for each settlement area when it was conquered. We need this to color them accordingly. We use shapefiles from liveuamap for this. If you do not have these, you can skip this step.

Values:
* `2014` Occupied during krim and donezk occupation
* `2022` Occupied during the 2022 invasion
* `not_occupied` Not occupied

In [None]:
advances_folder = Path('../../ukraine_mapping/export/areas_history/')
advances_day = '2025-03-03'

# Load 2014 shapes
gdf_2014 = gpd.read_file(Path('../data/separatist-area.geojson'))
geom_separatist = gdf_2014.iloc[0].geometry

# Load current advances
gdf_2022 = gpd.read_file(advances_folder / Path(f'{advances_day}.geojson'))
gdf_2022 = gdf_2022[gdf_2022['fill'] == '#ce4631'] # Only russian advances
geom_advances = gdf_2022.iloc[0].geometry

gdf_raw['occupied_since']  = 'not_occupied'

# Add 2022
gdf_raw['occupied_since'] = gdf_raw.apply(
    lambda row: '2022' if row.geometry.within(geom_advances) else row.occupied_since, axis=1
)

# Add 2014
gdf_raw['occupied_since'] = gdf_raw.apply(
    lambda row: '2014' if row.geometry.within(geom_separatist) else row.occupied_since, axis=1
)

gdf_raw.to_file(export_folder_db)

## Only > 10k
We only take settlement areas with more than 10000 inhabitants.

In [13]:
gdf_points = gdf_raw[gdf_raw["overpasstype"] == 'node'].copy()
gdf_points['population']  = gdf_points['population'].astype(float)
gdf_points = gdf_points.sort_values('population', ascending=False)

gdf_points = gdf_points[gdf_points.population >= 10000]

gdf_points.to_file(export_folder / 'settlements-reduced.gpkg')
len(gdf_points)

84

## Get all necessary shapes for each city

In [11]:
def request_overpass(query, type, clipping_geometry):
    response = requests.get(overpass_url, params={'data': query})
    if response.status_code == 200:
        
        # convert result to GeoDataFrame
        data = response.json()
        geojson = osm2geojson.json2geojson(data)
        gdf = gpd.GeoDataFrame.from_features(geojson)

        gdf.rename(columns={'type':'overpasstype'}, inplace=True)

        # Tags to columns
        tags_df = gdf['tags'].apply(pd.Series)
        
        # Remove names, too much to handle...
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('name:') | tags_df.columns.str.startswith('name:en')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_name:')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_old_name:')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('alt_name:')]
        tags_df = tags_df.loc[:, ~tags_df.columns.str.startswith('old_alt_name:')]
        
        gdf = pd.concat([gdf.drop(columns=['tags']), tags_df], axis=1)

        gdf['_type'] = type

        # Clip
        gdf = gpd.clip(gdf, clipping_geometry)
        gdf.crs = 'EPSG:4326'

        return gdf
    
def get_bounds(gdf):
    bbox = gdf.bounds.values[0]
    return ( bbox[1], bbox[0], bbox[3], bbox[2])

In [12]:
query_buildings = """
[out:json];
(
    way["building"]({x1},{y1},{x2},{x2});
    relation["building"]({x1},{y1},{x2},{x2});
);
(._;>;);
out body;
"""

query_streets = """
[out:json];
(
    way["highway"]({x1},{y1},{x2},{x2});
);
(._;>;);
out body;
"""

query_railway = """
[out:json];
(
    way["railway"]({x1},{y1},{x2},{x2});
);
(._;>;);
out body;
"""

In [13]:
# Get Streets

def get_streets(name, gdf_bounds):

    out_file = export_folder_shapes / f"{name}_streets.gpkg"
    
    if out_file.exists():
        return

    x1, y1, x2, y2 = get_bounds(gdf_bounds)

    gdf_streets = request_overpass(
        query_streets.format(
            x1 = x1, y1 = y1, x2 = x2, y2 = y2
        ),
        'streets',
        gdf_bounds.iloc[0].geometry
    )

    # Remove points
    gdf_streets = gdf_streets[gdf_streets['overpasstype'] != 'node']

    # Store
    gdf_streets.to_file(out_file)

In [14]:
# Get Railway
def get_railway(name, gdf_bounds):

    out_file = export_folder_shapes / f"{name}_railway.gpkg"

    if out_file.exists():
        return

    x1, y1, x2, y2 = get_bounds(gdf_bounds)

    gdf_railway = request_overpass(
        query_railway.format(
            x1 = x1, y1 = y1, x2 = x2, y2 = y2
        ), 'railway',
        gdf_bounds.iloc[0].geometry
    )

    # Remove points
    gdf_railway = gdf_railway[gdf_railway['overpasstype'] != 'node']

    # Only take this types
    railway = ['rail']
    gdf_railway = gdf_railway[gdf_railway['railway'].isin(railway)]

    # Store
    gdf_railway.to_file(out_file)

In [15]:
# Get Buildings
def get_buildings(name, gdf_bounds):

    out_file = export_folder_shapes / f"{name}_buildings.gpkg"
        
    if out_file.exists():
        return

    x1, y1, x2, y2 = get_bounds(gdf_bounds)

    gdf_buildings = request_overpass(query_buildings.format(
            x1 = x1, y1 = y1, x2 = x2, y2 = y2
        ),
        'buildings',
        gdf_bounds.iloc[0].geometry
    )


    # Remove FIXME, causes error
    if 'FIXME' in gdf_buildings.columns:
        gdf_buildings = gdf_buildings.drop(columns=['FIXME'])

    # Remove points
    gdf_buildings = gdf_buildings[gdf_buildings['overpasstype'] != 'node']


    # Store
    gdf_buildings.to_file(out_file)

In [None]:
def get_name(row):
    if (row['name:en'] is not np.nan) and (row['name:en'] is not None):
        return row['name:en'].lower()
    else:
        return row['name'].lower()

for i, row in gdf_points.iterrows():

    name = get_name(row)

    # print(f"Download data for {name}")
    
    gdf_city = gdf_raw[gdf_raw.geometry.contains(row.geometry) & (gdf_raw['overpasstype'] == 'relation')]

    # Download data
    get_buildings(name, gdf_city)
    get_streets(name, gdf_city)
    get_railway(name, gdf_city)

    # Get Boundaries
    gdf_city.to_file(export_folder_shapes / f"{name}_boundaries.gpkg")