In [1]:
import geopandas as gpd
import math
import os

def split_geojson_gpd(input_file, output_path, num_files):
    # Read the GeoJSON file using GeoPandas
    gdf = gpd.read_file(input_file)

    all_coords = []

    for polygon in gdf['geometry']:
        if polygon.geom_type == 'Polygon':
            coords = list(polygon.exterior.coords)
            all_coords.append(coords)
        elif polygon.geom_type == 'MultiPolygon':
            for part in polygon.geoms:  # Modified line
                coords = list(part.exterior.coords)
                all_coords.append(coords)

    from statistics import mean
    import pandas as pd

    max_min_values = []

    # Iterate through each building's coordinates
    for building_coords in all_coords:
        longitudes = [coord[0] for coord in building_coords]
        latitudes = [coord[1] for coord in building_coords]

        # Calculate max/min longitude and latitude
        max_longitude = max(longitudes)
        min_longitude = min(longitudes)
        max_latitude = max(latitudes)
        min_latitude = min(latitudes)
        Center_longitude = mean([max_longitude, min_longitude])
        Center_latitude = mean([max_latitude, min_latitude])
        # Append to list
        max_min_values.append([max_longitude, max_latitude, min_longitude, min_latitude, Center_longitude, Center_latitude ])


    # Create a DataFrame
    maxminCoord = pd.DataFrame(max_min_values, columns=['Max Longitude', 'Max Latitude', 'Min Longitude', 'Min Latitude','Center longitude','Center latitude' ])

    combined_df = pd.concat([gdf, maxminCoord], axis=1)


    
    # Calculate the number of rows per file
    total_rows = len(combined_df)
    rows_per_file = math.ceil(total_rows / num_files)
    
    for i in range(num_files):
        # Calculate start and end indices for the rows to include in the current split
        start_index = i * rows_per_file
        end_index = min((i + 1) * rows_per_file, total_rows)
        
        # Slice the GeoDataFrame to get the subset for the current split
        split_gdf = combined_df.iloc[start_index:end_index]
        
        # Define the output filename
        output_filename = os.path.join(output_path, f"Split_GeoJson{i+1}.geojson")
        
        # Save the subset GeoDataFrame to a new GeoJSON file
        split_gdf.to_file(output_filename, driver='GeoJSON')
        
        print(f"Created {output_filename}")



In [2]:
# Example usage
input_geojson_file = '/Users/gehaojie/Documents/GitHub/QTM550/Tax_Parcels_2023.geojson'
output_directory = '/Users/gehaojie/Documents/GitHub/QTM550/Split' # Update this path to your desired output directory
number_of_output_files = 200

split_geojson_gpd(input_geojson_file, output_directory, number_of_output_files)

Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson1.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson2.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson3.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson4.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson5.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson6.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson7.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson8.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson9.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson10.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson11.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson12.geojson
Created /Users/gehaojie/Documents/GitHub/QTM550/Split/Split_GeoJson13.geo