In [None]:
import geopandas as gpd
import os
import glob
import pandas as pd

In [None]:
def merge_geojsons(directory):
    """
    Reads all GeoJSON files in the specified directory and concatenates them into a single GeoDataFrame.

    Parameters:
    - directory (str): Path to the directory containing GeoJSON files.

    Returns:
    - gpd.GeoDataFrame: Merged GeoDataFrame containing all records.
    """
    geojson_files = glob.glob(os.path.join(directory, '*.geojson'))
    gdfs = []

    for file in geojson_files:
        try:
            gdf = gpd.read_file(file)
            gdfs.append(gdf)
        except Exception as e:
            print(f"Error reading {file}: {e}")

    if gdfs:
        merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
        merged_gdf.set_crs(gdfs[0].crs, inplace=True)
        return merged_gdf
    else:
        print('No GeoJSON files found.')
        return gpd.GeoDataFrame()

In [None]:
def process_segmented_geojsons(input_path, osm_data_path, output_path):
    gdf = merge_geojsons(input_path)
    osm_buildings = gpd.read_file(osm_data_path)
    gdf = gpd.sjoin(gdf, osm_buildings, how='left', predicate='intersects')
    gdf = gdf[gdf['index_right'].isna()].drop('index_right', axis=1)
    gdf.to_file(output_path, driver='GeoJSON')

In [None]:
data = [
    {'input_path': '../data/sliced/EE/2020_jsons', 'osm_data_path': '../data/EE/buildings/osm_buildings_ee.geojson', 'output_path': '../data/sliced/EE/2020_combined.geojson'},
    {'input_path': '../data/sliced/EE/2024_jsons', 'osm_data_path': '../data/EE/buildings/osm_buildings_ee.geojson', 'output_path': '../data/sliced/EE/2024_combined.geojson'},
    {'input_path': '../data/sliced/LT/2020_jsons', 'osm_data_path': '../data/LT/buildings/osm_buildings_lt.geojson', 'output_path': '../data/sliced/LT/2020_combined.geojson'},
    {'input_path': '../data/sliced/LT/2024_jsons', 'osm_data_path': '../data/LT/buildings/osm_buildings_lt.geojson', 'output_path': '../data/sliced/LT/2024_combined.geojson'},
]

for dataset in data:
    process_segmented_geojsons(**dataset)