In [None]:
from shapely.geometry import MultiPolygon
import json
from shapely.geometry import shape, mapping
from shapely.ops import unary_union
from shapely.ops import transform
from pyproj import Transformer
import os
import csv

In [2]:
file = "..\\rawData\\countries.geoJson"

with open(file, 'r') as f:
    data = json.load(f)

In [2]:

file = "..\\rawData\\countries.geoJson"

# EPSG:4326 to EPSG:3857 transformer
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)

with open(file, 'r') as f:
    data = json.load(f)

minx_global, miny_global = float('inf'), float('inf')
maxx_global, maxy_global = float('-inf'), float('-inf')

for feature in data['features']:
    geom = shape(feature['geometry'])

    # Project to EPSG:3857 if needed
    geom = transform(transformer.transform, geom)

    minx, miny, maxx, maxy = geom.bounds

    minx_global = min(minx_global, minx)
    miny_global = min(miny_global, miny)
    maxx_global = max(maxx_global, maxx)
    maxy_global = max(maxy_global, maxy)

print("Global extent (EPSG:3857):")
print(f"  minx: {minx_global:.2f}")
print(f"  miny: {miny_global:.2f}")
print(f"  maxx: {maxx_global:.2f}")
print(f"  maxy: {maxy_global:.2f}")

# Compute global center and extent
center_x = (minx_global + maxx_global) / 2
center_y = (miny_global + maxy_global) / 2
extent = max(maxx_global - minx_global, maxy_global - miny_global)
print(f"  center_x: {center_x:.2f}, center_y: {center_y:.2f}")
print(f"  extent: {extent:.2f} meters")

# If you want to normalize to fit in [-1, 1] for example
desired_output_size = 2.0
scale_factor = extent / desired_output_size
print(f"Suggested scale factor: {scale_factor:.2f}")


Global extent (EPSG:3857):
  minx: -20037508.34
  miny: -242528680.94
  maxx: 20037508.34
  maxy: 18428920.36
  center_x: 0.00, center_y: -112049880.29
  extent: 260957601.31 meters
Suggested scale factor: 130478800.65


## Correct missing ISO

In [10]:

# list all features name where 'ISO3166-1-Alpha-3' == -99
missingIso = [
    feature['properties']['name'] for feature in data['features']
    if feature['properties']['ISO3166-1-Alpha-3'] == '-99'
]
missingIso

missingIso = {
    "France": "FRA",
    "Somaliland": "SOM",
    "Kosovo": "KOS",
}

# replace -99 with corresponding name
for feature in data['features']:
    if feature['properties']['name'] in missingIso:
        feature['properties']['ISO3166-1-Alpha-3'] = missingIso[feature['properties']['name']]


In [11]:
from shapely.geometry import shape, MultiPolygon, Polygon, mapping
from shapely.ops import transform
from pyproj import Transformer
import os, csv, json

def createCoordsFile(format="csv", idx=0, tolerance=0.02, output_dir='../processedData', crs="EPSG:4326"):

    if crs == "EPSG:4326":
        crs_path = "epsg4326"
    elif crs == "EPSG:3857":
        crs_path = "epsg3857"

    feature = data['features'][idx]
    iso3 = feature['properties']['ISO3166-1-Alpha-3']
    geometry = shape(feature['geometry'])

    # Reproject geometry if needed
    if crs == "EPSG:3857":
        transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
        geometry = transform(transformer.transform, geometry)

    simplified_geom = geometry.simplify(tolerance, preserve_topology=True)

    # Ensure geometry is iterable
    if isinstance(simplified_geom, Polygon):
        polygons = [simplified_geom]
    elif isinstance(simplified_geom, MultiPolygon):
        polygons = list(simplified_geom.geoms)
    else:
        raise TypeError(f"Unsupported geometry type: {simplified_geom.geom_type}")

    # Create output folder
    tol_str = str(tolerance).replace('.', '_')
    output_dir = f'../processedData/{format}/{crs_path}/tol{tol_str}'
    os.makedirs(output_dir, exist_ok=True)

    if format == "csv":
        csv_file = os.path.join(output_dir, f'{iso3}.csv')
        with open(csv_file, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['poly_idx', 'line_idx', 'lat', 'lon'] if crs == "EPSG:4326" else ['poly_idx', 'line_idx', 'y', 'x'])

            for poly_idx, polygon in enumerate(polygons):
                outline = polygon.boundary

                if outline.geom_type == 'MultiLineString':
                    lines = [list(line.coords) for line in outline.geoms]
                else:
                    lines = [list(outline.coords)]

                for line_idx, line in enumerate(lines):
                    for coord in line:
                        if isinstance(coord, (list, tuple)) and len(coord) == 2:
                            x, y = coord
                            writer.writerow([poly_idx, line_idx, y, x])
                        else:
                            print(f"Warning: Invalid coordinate format: {coord}")

    elif format == "json":
        outline_coords = []
        for polygon in polygons:
            outline = polygon.boundary
            if outline.geom_type == 'MultiLineString':
                outline_coords.extend([list(line.coords) for line in outline.geoms])
            else:
                outline_coords.append(list(outline.coords))

        with open(os.path.join(output_dir, f'{iso3}.json'), 'w') as f:
            json.dump({"coordinates": outline_coords}, f, indent=2)

    print(f"Created {iso3} at {os.path.join(output_dir, f'{iso3}.{format}')}")


In [None]:
from shapely.geometry import shape, MultiPolygon, Polygon
from shapely.ops import transform
from pyproj import Transformer
import os, json

def createCoordsFile(idx=0, tolerance=0.02, output_dir='../processedData', crs="EPSG:4326", format='json', normalize=False, scale_factor=50000):

    # Set CRS subfolder
    crs_path = "epsg4326" if crs == "EPSG:4326" else "epsg3857"

    feature = data['features'][idx]
    iso3 = feature['properties']['ISO3166-1-Alpha-3']
    geometry = shape(feature['geometry'])

    # Reproject geometry if needed
    if crs == "EPSG:3857":
        transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
        geometry = transform(transformer.transform, geometry)

    simplified_geom = geometry.simplify(tolerance, preserve_topology=True)

    # Ensure geometry is iterable
    if isinstance(simplified_geom, Polygon):
        polygons = [simplified_geom]
    elif isinstance(simplified_geom, MultiPolygon):
        polygons = list(simplified_geom.geoms)
    else:
        raise TypeError(f"Unsupported geometry type: {simplified_geom.geom_type}")

    # Prepare output path
    tol_str = str(tolerance).replace('.', '_')
    output_dir = f'../processedData/{format}/{crs_path}/tol{tol_str}'

    if normalize:
        output_dir += f'_normalized_sf{scale_factor}'
    os.makedirs(output_dir, exist_ok=True)

    # Compute normalization parameters
    if normalize:
        bounds = simplified_geom.bounds  # (minx, miny, maxx, maxy)
        center_x = (bounds[0] + bounds[2]) / 2
        center_y = (bounds[1] + bounds[3]) / 2
        scale = 1 / scale_factor

    outline_coords = []

    for polygon in polygons:
        outline = polygon.boundary
        lines = outline.geoms if outline.geom_type == 'MultiLineString' else [outline]
        for line in lines:
            coords = list(line.coords)
            if normalize:
                coords = [((x - center_x) * scale, (y - center_y) * scale) for x, y in coords]
            outline_coords.append(coords)

    # Export to file
    output_path = os.path.join(output_dir, f'{iso3}.json')
    with open(output_path, 'w') as f:
        json.dump({"coordinates": outline_coords}, f, indent=2)

    print(f"Created {iso3} at {output_path}")


In [None]:
for i in range(len(data['features'])):
    createCoordsFile(format='json', idx=i, tolerance=0, output_dir='../processedData', normalize=True, scale_factor=scale_factor)
    createCoordsFile(format='json', crs="EPSG:3857", idx=i, tolerance=0, output_dir='../processedData', normalize=True, scale_factor=scale_factor)

Created IDN at ../processedData/json/epsg4326/tol0_normalized_sf130478800.65253803\IDN.json
Created IDN at ../processedData/json/epsg3857/tol0_normalized_sf130478800.65253803\IDN.json
Created MYS at ../processedData/json/epsg4326/tol0_normalized_sf130478800.65253803\MYS.json
Created MYS at ../processedData/json/epsg3857/tol0_normalized_sf130478800.65253803\MYS.json
Created CHL at ../processedData/json/epsg4326/tol0_normalized_sf130478800.65253803\CHL.json
Created CHL at ../processedData/json/epsg3857/tol0_normalized_sf130478800.65253803\CHL.json
Created BOL at ../processedData/json/epsg4326/tol0_normalized_sf130478800.65253803\BOL.json
Created BOL at ../processedData/json/epsg3857/tol0_normalized_sf130478800.65253803\BOL.json
Created PER at ../processedData/json/epsg4326/tol0_normalized_sf130478800.65253803\PER.json
Created PER at ../processedData/json/epsg3857/tol0_normalized_sf130478800.65253803\PER.json
Created ARG at ../processedData/json/epsg4326/tol0_normalized_sf130478800.652538

In [None]:
# createCoordsFile(format='csv', idx=90, tolerance=0, output_dir='../processedData')