Installation of dependency.

!pip install geovoronoi[plotting]
!pip install geovoronoi descartes
!pip install geovoronoi descartes requests

This script processes geographic data to generate Voronoi regions based on a set of locations and a provided map file (shapefile or GeoJSON). It handles file uploads, cleans and simplifies geometries, calculates Voronoi polygons, and assigns business units for visualization. The results are displayed as a map and exported as a GeoJSON file for further analysis. Additionally, the script ensures proper coordinate transformations and styling for compatibility with mapping tools like geojson.io.

import logging
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon, Point, MultiPolygon
import requests
import tempfile
import os
import pandas as pd
import zipfile
import json
import matplotlib.colors as mcolors
from geovoronoi import coords_to_points, points_to_coords, voronoi_regions_from_coords, calculate_polygon_areas
from google.colab import files

# Setup logging
logging.basicConfig(level=logging.INFO)
geovoronoi_log = logging.getLogger('geovoronoi')
geovoronoi_log.setLevel(logging.INFO)
geovoronoi_log.propagate = True

# Create temporary directory for our files
temp_dir = tempfile.mkdtemp()

# Step 1: Upload the CSV file
print('Please upload your Locations.csv file:')
uploaded_csv = files.upload()
csv_filename = list(uploaded_csv.keys())[0]
print(f'Using uploaded CSV file: {csv_filename}')

# Load location data from the uploaded CSV file
locations_df = pd.read_csv(csv_filename)
print(f'Loaded {len(locations_df)} locations')
print(locations_df.head())

# Function to handle different map file formats
def load_map_shape(file_path):
    """
    Load a map shape from either a shapefile (ZIP) or GeoJSON file.
    
    Args:
        file_path: Path to the file
        
    Returns:
        Tuple of (area_shape, original_crs)
    """
    file_extension = os.path.splitext(file_path)[1].lower()
    
    if file_extension == '.zip':
        # Handle shapefile in ZIP
        print(f'Detected ZIP file. Attempting to load shapefile...')
        
        # Extract the ZIP file
        shapefile_dir = os.path.join(temp_dir, 'shapefile')
        os.makedirs(shapefile_dir, exist_ok=True)
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(shapefile_dir)
        
        # Find the .shp file in the extracted directory
        shp_files = [f for f in os.listdir(shapefile_dir) if f.endswith('.shp')]
        if not shp_files:
            raise ValueError("No .shp file found in the uploaded ZIP file")
        
        shapefile_path = os.path.join(shapefile_dir, shp_files[0])
        print(f'Found shapefile: {shp_files[0]}')
        
        # Read the shapefile using GeoPandas
        shape_gdf = gpd.read_file(shapefile_path)
        print(f'Shapefile CRS: {shape_gdf.crs}')
        print(f'Shapefile contains {len(shape_gdf)} features')
        
        # Display information about the shapefile
        print('\nShapefile attributes:')
        print(shape_gdf.columns.tolist())
        
        # Get the first feature's geometry
        original_shape = shape_gdf.iloc[0].geometry
        print(f'Using geometry: {original_shape.geom_type}')
        
        return original_shape, shape_gdf.crs
        
    elif file_extension == '.json' or file_extension == '.geojson':
        # Handle GeoJSON file
        print(f'Detected GeoJSON file. Loading...')
        
        # Read the GeoJSON file
        with open(file_path, 'r') as f:
            geojson_data = json.load(f)
        
        # Check if it's a TopoJSON file (has "transform" and "objects" properties)
        if "transform" in geojson_data and "objects" in geojson_data:
            print('Detected TopoJSON format. Converting to GeoJSON...')
            # For TopoJSON, we need to use geopandas to read it properly
            gdf = gpd.read_file(file_path)
            
            # If no CRS is found, assign WGS84 as the default
            if gdf.crs is None:
                print('No CRS detected. Assuming WGS84 (EPSG:4326).')
                gdf = gdf.set_crs(epsg=4326)
        else:
            # Regular GeoJSON
            gdf = gpd.read_file(file_path)
            
            # If no CRS is found, assign WGS84 as the default
            if gdf.crs is None:
                print('No CRS detected. Assuming WGS84 (EPSG:4326).')
                gdf = gdf.set_crs(epsg=4326)
        
        print(f'GeoJSON CRS: {gdf.crs}')
        print(f'GeoJSON contains {len(gdf)} features')
        
        # Get the first feature's geometry
        original_shape = gdf.iloc[0].geometry
        print(f'Using geometry: {original_shape.geom_type}')
        
        return original_shape, gdf.crs
    else:
        raise ValueError(f"Unsupported file format: {file_extension}. Please upload either a ZIP file containing a shapefile or a GeoJSON file.")

# Step 2: Upload the map file (either zip or geojson)
print('\nPlease upload your map file (ZIP containing shapefile OR GeoJSON/TopoJSON file):')
uploaded_map = files.upload()
map_filename = list(uploaded_map.keys())[0]
print(f'Using uploaded map file: {map_filename}')

# Save the uploaded file
map_path = os.path.join(temp_dir, map_filename)
with open(map_path, 'wb') as f:
    f.write(uploaded_map[map_filename])

# Load the map shape based on file type
original_shape, original_crs = load_map_shape(map_path)

# Fix invalid geometries - use a small negative buffer first, then positive to smooth edges
# This helps eliminate small artifacts that might cause problems
print('Fixing any invalid geometry issues...')
# Apply a small negative buffer followed by positive buffer to clean the geometry
buffer_amount = 0.001  # Very small buffer appropriate for degrees (if in WGS84)
if original_crs == "EPSG:4326":  # If WGS84
    cleaned_shape = original_shape.buffer(-buffer_amount).buffer(buffer_amount)
else:
    # For projected CRS, use a larger buffer value
    cleaned_shape = original_shape.buffer(-10).buffer(10)  # 10 meters

# Handle empty geometry results
if cleaned_shape.is_empty:
    print("Warning: Buffer resulted in empty geometry. Using original with buffer(0) instead.")
    area_shape = original_shape.buffer(0)
else:
    area_shape = cleaned_shape

print(f'Is geometry valid after fix: {area_shape.is_valid}')

# If the shapefile has a different CRS than EPSG:3035, convert it
if original_crs != "EPSG:3035":
    print(f'Converting from {original_crs} to EPSG:3035')
    # Create a new GeoDataFrame with the fixed geometry
    fixed_gdf = gpd.GeoDataFrame(geometry=[area_shape], crs=original_crs)
    fixed_gdf = fixed_gdf.to_crs(epsg=3035)
    area_shape = fixed_gdf.iloc[0].geometry
else:
    # Create a new GeoDataFrame with the fixed geometry
    fixed_gdf = gpd.GeoDataFrame(geometry=[area_shape], crs=original_crs)

# Create GeoDataFrame from the locations
gdf = gpd.GeoDataFrame(
    locations_df, 
    geometry=gpd.points_from_xy(
        locations_df.Longitude, 
        locations_df.Latitude
    ),
    crs="EPSG:4326"  # WGS84
)

# Convert to the same CRS as the shapefile
gdf = gdf.to_crs(epsg=3035)

# Extract coordinates for the Voronoi calculation
coords = np.array([(point.x, point.y) for point in gdf.geometry])

# Simplify the shape before Voronoi calculation to reduce complexity
# This can help avoid issues with the Voronoi algorithm
simplify_tolerance = 100  # 100 meters (adjust based on your data)
simplified_shape = area_shape.simplify(simplify_tolerance)
print(f"Simplified shape from {len(str(area_shape))} to {len(str(simplified_shape))} characters")

# Calculate the Voronoi regions, cut them with the simplified geographic area shape
print('\nCalculating Voronoi regions...')
try:
    region_polys, region_pts = voronoi_regions_from_coords(coords, simplified_shape)
except Exception as e:
    print(f"Error with simplified shape: {e}")
    print("Trying with further simplification...")
    # Try with more aggressive simplification
    simplified_shape = area_shape.simplify(simplify_tolerance * 2)
    region_polys, region_pts = voronoi_regions_from_coords(coords, simplified_shape)

# Calculate area in km²
poly_areas = calculate_polygon_areas(region_polys, m2_to_km2=True)  # converts m² to km²
print('Areas in km²:')
pprint(poly_areas)
print('Sum:')
print(sum(poly_areas.values()))

# Match region IDs with city names for labeling
city_regions = {}
for region_id, point_indices in region_pts.items():
    for idx in point_indices:
        city_regions[region_id] = locations_df.iloc[idx]['City']

# Create color mapping for business units
def get_business_unit_colors(voronoi_gdf):
    # Get unique business units
    business_units = voronoi_gdf['business_unit'].dropna().unique()
    
    # Create color map - Tab10 has 10 distinct colors
    cmap = plt.cm.get_cmap('tab10', len(business_units))
    
    # Create a dictionary mapping business units to colors
    bu_color_map = {}
    for i, bu in enumerate(business_units):
        # Get RGBA color and convert to hex
        rgba = cmap(i)
        hex_color = mcolors.to_hex(rgba)
        bu_color_map[bu] = hex_color
    
    # Add the color column to the GeoDataFrame
    voronoi_gdf['fill'] = voronoi_gdf['business_unit'].map(bu_color_map)
    
    # For any NaN values, provide a default color
    voronoi_gdf['fill'].fillna('#CCCCCC', inplace=True)
    
    return voronoi_gdf, bu_color_map

# Create plot using GeoDataFrames
def create_plot_using_geopandas(area_shape, region_polys, gdf, region_pts, poly_areas, city_regions):
    # Create GeoDataFrame for the area shape
    area_gdf = gpd.GeoDataFrame(geometry=[area_shape], crs="EPSG:3035")
    
    # Create GeoDataFrame for Voronoi regions
    voronoi_data = []
    for region_id, region_poly in region_polys.items():
        area = poly_areas.get(region_id, 0)
        city_name = city_regions.get(region_id, f"Region {region_id}")
        business_unit = None
        
        # Find business unit for this region
        for idx in region_pts.get(region_id, []):
            if idx < len(gdf):
                business_unit = gdf.iloc[idx]['Business_Unit']
                break
                
        voronoi_data.append({
            'region_id': region_id,
            'city': city_name,
            'business_unit': business_unit,
            'area_km2': area,
            'geometry': region_poly
        })
    voronoi_gdf = gpd.GeoDataFrame(voronoi_data, geometry='geometry', crs="EPSG:3035")
    
    # Add color column based on business unit
    voronoi_gdf, color_map = get_business_unit_colors(voronoi_gdf)
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(12, 12))
    
    # Plot base area
    area_gdf.plot(ax=ax, color='white', edgecolor='black')
    
    # Plot Voronoi regions colored by Business Unit with alpha=0.3
    if 'business_unit' in voronoi_gdf.columns and not voronoi_gdf['business_unit'].isna().all():
        voronoi_gdf.plot(
            ax=ax, 
            column='business_unit',
            categorical=True, 
            cmap='tab10', 
            alpha=0.3,  # Set transparency to 0.3
            edgecolor='black',
            legend=True,
            legend_kwds={'title': 'Business Unit'}
        )
    else:
        voronoi_gdf.plot(ax=ax, column='region_id', cmap='Pastel1', alpha=0.3, edgecolor='black')
    
    # Plot original points with city labels
    for idx, row in gdf.iterrows():
        x, y = row.geometry.x, row.geometry.y
        ax.plot(x, y, 'ko', markersize=5)
        ax.text(x, y+20000, row['City'], fontsize=8, ha='center', fontweight='bold')
    
    # No km² labels as requested
    
    # Set title and layout
    ax.set_title(f'Voronoi regions for {len(gdf)} locations\nBased on custom map', fontsize=14)
    ax.set_aspect('equal')
    plt.tight_layout()
    
    return fig, voronoi_gdf, area_gdf, color_map

# Function to export the data to a single combined GeoJSON file for geojson.io
def export_combined_geojson(country_shape_gdf, voronoi_regions_gdf, points_gdf, color_map):
    """
    Export the country boundary, Voronoi regions, and points to a single GeoJSON file
    with geojson.io styling properties.
    
    Args:
        country_shape_gdf: GeoDataFrame of the country boundary
        voronoi_regions_gdf: GeoDataFrame of Voronoi regions
        points_gdf: GeoDataFrame of points
        color_map: Dictionary mapping business units to colors
    """
    # Convert all to EPSG:4326 (WGS84) for better compatibility with GeoJSON
    country_wgs84 = country_shape_gdf.to_crs(epsg=4326)
    voronoi_wgs84 = voronoi_regions_gdf.to_crs(epsg=4326)
    points_wgs84 = points_gdf.to_crs(epsg=4326)
    
    # Add lat/lon columns to points for clarity
    points_wgs84['latitude'] = points_wgs84.geometry.y
    points_wgs84['longitude'] = points_wgs84.geometry.x
    
    # Add a type field to distinguish features
    country_wgs84['feature_type'] = 'country'
    voronoi_wgs84['feature_type'] = 'voronoi'
    points_wgs84['feature_type'] = 'point'
    
    # Ensure color information is preserved - use 'fill' property for geojson.io
    if 'fill' not in voronoi_wgs84.columns:
        voronoi_wgs84['fill'] = voronoi_wgs84['business_unit'].map(color_map)
        voronoi_wgs84['fill'].fillna('#CCCCCC', inplace=True)
    
    # Add geojson.io styling properties
    voronoi_wgs84['fill-opacity'] = 0.3
    voronoi_wgs84['stroke'] = '#000000'  # Black outline
    voronoi_wgs84['stroke-width'] = 1
    voronoi_wgs84['stroke-opacity'] = 1
    
    # Add styling for the country outline
    country_wgs84['fill'] = 'white'
    country_wgs84['fill-opacity'] = 0.1
    country_wgs84['stroke'] = '#000000'
    country_wgs84['stroke-width'] = 2
    country_wgs84['stroke-opacity'] = 1
    
    # Add styling for points
    points_wgs84['fill'] = 'black'
    points_wgs84['fill-opacity'] = 1
    points_wgs84['stroke'] = '#FFFFFF'
    points_wgs84['stroke-width'] = 1
    points_wgs84['stroke-opacity'] = 0.8
    
    # Combine all GeoDataFrames
    combined_gdf = pd.concat([country_wgs84, voronoi_wgs84, points_wgs84])
    
    # Export to a single GeoJSON file
    output_filename = "voronoi_analysis.geojson"
    combined_gdf.to_file(output_filename, driver="GeoJSON")
    
    print(f"Exported combined GeoJSON file: {output_filename}")
    print("This file can be visualized directly in geojson.io")
    print("The file includes appropriate styling properties:")
    print(" - fill: Color for each feature")
    print(" - fill-opacity: Set to 0.3 for Voronoi regions")
    print(" - stroke: Border color")
    print(" - stroke-width: Border thickness")
    print(" - stroke-opacity: Border opacity")
    
    # Download file
    files.download(output_filename)
    
    return output_filename

# Generate and save the plot
print('\nGenerating visualization...')
fig, voronoi_gdf, area_gdf, color_map = create_plot_using_geopandas(area_shape, region_polys, gdf, region_pts, poly_areas, city_regions)
plt.savefig('voronoi_regions_custom.png', dpi=300)
plt.show()

# Download the generated map
files.download('voronoi_regions_custom.png')

# Export data to a single GeoJSON file
print('\nExporting data to a single GeoJSON file for geojson.io...')
export_combined_geojson(area_gdf, voronoi_gdf, gdf, color_map)

# Clean up temporary files
import shutil
shutil.rmtree(temp_dir)

# Print mapping between region IDs and cities
print("\nRegion ID to City mapping:")
for region_id, city in sorted(city_regions.items()):
    print(f"Region {region_id}: {city} - {int(poly_areas.get(region_id, 0))} km²")