In [6]:
import pandas as pd
import lxml.etree as etree
import json
import os

In [26]:
output_dir = "Geographical_Clusters"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

kml_file = '1DrainageRegions_SouthAfrica__doc.kml'
geojson_path = os.path.join(output_dir, "africawaterbody.geojson")

def safe_extract_clusters(input_path, output_path):
    with open(input_path, 'rb') as f:
        tree = etree.parse(f)

    root = tree.getroot()
    # Using a wildcard namespace to avoid 'NoneType' or 'Namespace' errors
    ns = {"ns": root.tag.split('}')[0].strip('{')} if '}' in root.tag else {}
    
    features = []
    
    # Use //Placemark to find all entries regardless of folder depth
    query = ".//ns:Placemark" if ns else ".//Placemark"
    
    for placemark in root.xpath(query, namespaces=ns):
        # Find name and coordinates
        name_node = placemark.find(".//ns:name", namespaces=ns) if ns else placemark.find(".//name")
        coord_node = placemark.find(".//ns:coordinates", namespaces=ns) if ns else placemark.find(".//coordinates")
        
        if coord_node is not None and coord_node.text:
            # Clean coordinate strings
            raw_coords = coord_node.text.strip().split()
            polygon_coords = []
            
            for c in raw_coords:
                parts = c.split(',')
                if len(parts) >= 2:
                    polygon_coords.append([float(parts[0]), float(parts[1])])
            
            # Create GeoJSON Feature
            feature = {
                "type": "Feature",
                "properties": {
                    "cluster_name": name_node.text.strip() if name_node is not None else "Unknown"
                },
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [polygon_coords]
                }
            }
            features.append(feature)
            
    geojson_data = {
        "type": "FeatureCollection",
        "features": features
    }
    
    with open(output_path, 'w') as f:
        json.dump(geojson_data, f, indent=2)
    
    return len(features)

In [27]:
try:
    count = safe_extract_clusters(kml_file, geojson_path)
    print(f"✅ Success! {count} clusters saved to {geojson_path}")
except Exception as e:
    print(f"❌ Error: {e}")

✅ Success! 24 clusters saved to Geographical_Clusters/africawaterbody.geojson


In [28]:
if os.path.exists(geojson_path):
    with open(geojson_path, 'r') as f:
        preview = json.load(f)
        print(f"Preview of first cluster: {preview['features'][0]['properties']}")

Preview of first cluster: {'cluster_name': 'Region A'}


In [29]:
import folium
import json

# --- STEP 1: LOAD YOUR NEW CLUSTER FILE ---
geojson_path = "Geographical_Clusters/africawaterbody.geojson"

with open(geojson_path, 'r') as f:
    cluster_data = json.load(f)

# --- STEP 2: CREATE THE BASE MAP ---
# Centered on South Africa
m = folium.Map(location=[-28.479, 24.672], zoom_start=5, tiles='cartodbpositron')

# --- STEP 3: ADD THE CLUSTER POLYGONS ---
folium.GeoJson(
    cluster_data,
    name='Geographical Clusters',
    style_function=lambda x: {
        'fillColor': '#3186cc',
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.4,
    },
    tooltip=folium.GeoJsonTooltip(
        fields=['cluster_name'],
        aliases=['Cluster Region:'],
        labels=True
    )
).add_to(m)

# --- STEP 4: DISPLAY ---
m

In [30]:
# This line specifically visualizes your Geographical Clusters
geojson_path = "Geographical_Clusters/africawaterbody.geojson"

# Create the visualization line
folium.GeoJson(
    geojson_path,
    name='Geographical Clusters',
    tooltip=folium.GeoJsonTooltip(fields=['cluster_name'], aliases=['Region:'])
).add_to(m)

<folium.features.GeoJson at 0x10fead030>