In [15]:
import os
import json
import shutil
from geojson_validator import validate_structure
from shapely.geometry import shape, mapping
from pyproj import CRS, Transformer

# `validate_geojson_features`  processes GeoJSON files by validating their features:

1. **Input Directories**: We will take in the path to a directory containing `.geojson` files.
2. **Validation**: We'll validate each feature in the GeoJSON files.
3. **Output Directories**: We will output:
   - Invalid features in a new file named `{filename}_invalid_features.geojson`.
   - Valid features in a new file named `{filename}_validated.geojson`.
4. **Reprojection**: Ensure all outputs are in WGS84 (EPSG:4326) and properly formatted, even if the geometries are empty.


5. **Directory Creation**:
   - We ensure the `invalid_features_path` and `valid_features_path` directories exist or create them.

6. **WGS84 Transformation**:
   - We prepare to transform features into the WGS84 coordinate system, though in this example, we assume all geometries are already in WGS84.

7. **Processing GeoJSON Files**:
   - The function iterates through all `.geojson` files in the specified directory.
   - For each file, it reads the GeoJSON data and checks each feature for validity.
   - Valid features are added to a list of valid features; invalid ones are added to a list of invalid features.

8. **Writing Output Files**:
   - The function writes two separate files for each input GeoJSON:
     - A `{filename}_validated.geojson` file containing all valid features.
     - A `{filename}_invalid_features.geojson` file containing all invalid features.
   - Both outputs are valid GeoJSON files, even if empty.

9. **Error Handling**:
   - The function gracefully handles cases where features might be invalid or cannot be processed.


In [17]:


def validate_geojson_features(unvalidated_geojson_path, invalid_features_path, valid_features_path):
    # Create output directories if they don't exist
    os.makedirs(invalid_features_path, exist_ok=True)
    os.makedirs(valid_features_path, exist_ok=True)
    
    # Setup WGS84 transformation
    wgs84 = CRS.from_epsg(4326)
    
    # Iterate through each GeoJSON file in the directory
    for filename in os.listdir(unvalidated_geojson_path):
        file_path = os.path.join(unvalidated_geojson_path, filename)
        
        # Skip non-geojson files
        if not filename.endswith('.geojson') or not os.path.isfile(file_path):
            continue
        
        with open(file_path, 'r') as geojson_file:
            geojson_data = json.load(geojson_file)
        
        valid_features = []
        invalid_features = []
        
        # Validate each feature
        for feature in geojson_data.get('features', []):
            geom = feature.get('geometry', None)
            
            # Validate feature geometry
            try:
                if geom:
                    shapely_geom = shape(geom)
                    is_valid = shapely_geom.is_valid
                else:
                    is_valid = False  # Consider empty geometries as invalid

                if is_valid:
                    # Ensure the geometry is in WGS84
                    valid_features.append(feature)
                else:
                    invalid_features.append(feature)

            except Exception as e:
                invalid_features.append(feature)

        # Write valid features to new GeoJSON file
        valid_output_path = os.path.join(valid_features_path, f'{os.path.splitext(filename)[0]}_validated.geojson')
        with open(valid_output_path, 'w') as valid_file:
            json.dump({
                'type': 'FeatureCollection',
                'features': valid_features
            }, valid_file, indent=2)
        
        # Write invalid features to new GeoJSON file
        invalid_output_path = os.path.join(invalid_features_path, f'{os.path.splitext(filename)[0]}_invalid_features.geojson')
        with open(invalid_output_path, 'w') as invalid_file:
            json.dump({
                'type': 'FeatureCollection',
                'features': invalid_features
            }, invalid_file, indent=2)

# Example usage:
# validate_geojson_features('/path/to/unvalidated_geojson', '/path/to/invalid_features', '/path/to/valid_features')


In [18]:
validate_geojson_features(
    '/Users/maples/Scratch/MRM/data/testing_subset/wgs84',
    '/Users/maples/Scratch/MRM/data/testing_subset/validated_features/invalid',
    '/Users/maples/Scratch/MRM/data/testing_subset/validated_features/valid'
)
