# Download the data

In [None]:
!wget https://download.geofabrik.de/africa-latest.osm.pbf
!wget https://download.geofabrik.de/asia-latest.osm.pbf
!wget https://download.geofabrik.de/australia-oceania-latest.osm.pbf
!wget https://download.geofabrik.de/central-america-latest.osm.pbf
!wget https://download.geofabrik.de/europe-latest.osm.pbf
!wget https://download.geofabrik.de/north-america-latest.osm.pbf
!wget https://download.geofabrik.de/south-america-latest.osm.pbf

# Data preprocessing

In [None]:
!osmium tags-filter africa-latest.osm.pbf barrier=fence -o fences_africa.osm.pbf
!osmium tags-filter asia-latest.osm.pbf barrier=fence -o fences_asia.osm.pbf
!osmium tags-filter australia-oceania-latest.osm.pbf barrier=fence -o fences_australia_oceania.osm.pbf
!osmium tags-filter central-america-latest.osm.pbf barrier=fence -o fences_central_america.osm.pbf
!osmium tags-filter europe-latest.osm.pbf barrier=fence -o fences_europe.osm.pbf
!osmium tags-filter north-america-latest.osm.pbf barrier=fence -o fences_north_america.osm.pbf
!osmium tags-filter south-america-latest.osm.pbf barrier=fence -o fences_south_america.osm.pbf

In [None]:
!ogr2ogr -f GPKG fences_africa.gpkg fences_africa.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines
!ogr2ogr -f GPKG fences_asia.gpkg fences_asia.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines
!ogr2ogr -f GPKG fences_australia_oceania.gpkg fences_australia_oceania.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines
!ogr2ogr -f GPKG fences_central_america.gpkg fences_central_america.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines
!ogr2ogr -f GPKG fences_europe.gpkg fences_europe.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines
!ogr2ogr -f GPKG fences_north_america.gpkg fences_north_america.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines
!ogr2ogr -f GPKG fences_south_america.gpkg fences_south_america.osm.pbf -select other_tags,barrier -where "barrier='fence'" lines

# Extract and clip fences by region or continent

This script loads global administrative boundaries and a dataset of fences, then clips the fences for a **specific region or continent**, saving the results as GeoPackage files.

In [None]:
import geopandas as gpd
import os
from tqdm import tqdm

def clip_fences_by_region(admin_boundaries_path, fences_path, output_folder, filter_column, filter_value):
    """
    Clips fences based on a specific region or continent.

    Parameters:
    - admin_boundaries_path: str -> Path to the administrative boundaries shapefile
    - fences_path: str -> Path to the fences dataset (GeoPackage)
    - output_folder: str -> Directory where results will be saved
    - filter_column: str -> Name of the column in the boundaries dataset used for filtering (e.g., 'region' or 'continent')
    - filter_value: str -> Value of the region/continent to filter (e.g., 'Caribbean', 'Europe')

    Output:
    - A subfolder with GeoPackage files, one per country within the selected region/continent.
    """
    
    # Load datasets
    print("📂 Loading files...")
    boundaries = gpd.read_file(admin_boundaries_path)  
    fences = gpd.read_file(fences_path)  

    # Filter boundaries for the specified region or continent
    selected_boundaries = boundaries[boundaries[filter_column] == filter_value]

    # Create output directory
    os.makedirs(output_folder, exist_ok=True)

    # Clip fences by country and save results
    print(f"✂ Clipping fences for {filter_value} ({len(selected_boundaries)} countries)...")
    for _, country in tqdm(selected_boundaries.iterrows(), total=len(selected_boundaries), desc="Processing", unit="country"):
        country_name = country["name"].replace(" ", "_")  # Clean country name
        country_geom = country.geometry  

        # Filter fences that intersect with the country's boundary
        fences_in_country = fences[fences.intersects(country_geom)]

        # Create a folder for the country and save as GeoPackage
        country_folder = os.path.join(output_folder, country_name)
        os.makedirs(country_folder, exist_ok=True)
        fences_in_country.to_file(os.path.join(country_folder, f"{country_name}.gpkg"), layer="fences", driver="GPKG")

    print(f"🎉 Clipping completed! All files are stored in '{output_folder}'")

In [None]:
clip_fences_by_region(
    admin_boundaries_path="/home/user/world-administrative-boundaries.shp",
    fences_path="/home/user/fences_filtered.gpkg",
    output_folder="Caribbean",
    filter_column="region",
    filter_value="Caribbean"
)

# Extract and filter fence types

This script processes **GeoPackage files** containing fence data for different countries within a **specified region or continent**.  
It extracts the `fence_type` attribute from the `other_tags` field, filters out fences without a valid `fence_type`, and saves the cleaned dataset.

In [None]:
import geopandas as gpd
import os
import re
import pandas as pd
from tqdm import tqdm

def extract_fence_type(tags):
    """
    Extracts the value of 'fence_type' from the 'other_tags' field.

    Parameters:
    - tags (str): The string containing all OSM tags

    Returns:
    - (str or None): The extracted 'fence_type' value if found, else None
    """
    if not tags or pd.isna(tags):  # If `other_tags` is NULL or empty
        return None  
    match = re.search(r'"fence_type"=>"(.*?)"', tags)  # Regex search
    return match.group(1) if match else None


def process_fence_data(region_folder):
    """
    Processes and filters fences for a specific region or continent.

    Parameters:
    - region_folder (str): Path to the folder containing country-level subfolders.

    Output:
    - Generates filtered GeoPackage files for each country, containing only fences with a valid `fence_type`.
    """

    # 📂 Iterate through each country folder
    for country_name in tqdm(os.listdir(region_folder), desc=f"Processing {region_folder}"):
        country_path = os.path.join(region_folder, country_name)
        
        # Ensure it's a directory
        if not os.path.isdir(country_path):
            continue

        # 📌 Path to the original GeoPackage file
        input_gpkg = os.path.join(country_path, f"{country_name}.gpkg")
        if not os.path.exists(input_gpkg):
            print(f"⚠ No file found for {country_name}, skipping...")
            continue

        # 📂 Load the GeoPackage file
        gdf = gpd.read_file(input_gpkg, layer="fences")

        # 📌 Extract `fence_type` from `other_tags` and create a new column
        gdf["fence_type"] = gdf["other_tags"].apply(lambda x: extract_fence_type(x))

        # 📌 Filter only fences where `fence_type` is not NULL
        gdf_filtered = gdf.dropna(subset=["fence_type"]).copy()

        # 📌 Remove `other_tags` column as it is no longer needed
        if "other_tags" in gdf_filtered.columns:
            gdf_filtered.drop(columns=["other_tags"], inplace=True)

        # 📌 Save only if valid fences exist
        if not gdf_filtered.empty:
            output_gpkg = os.path.join(country_path, f"{country_name}_classes.gpkg")
            gdf_filtered.to_file(output_gpkg, layer="fences_classes", driver="GPKG")
            print(f"✅ {output_gpkg} created with {len(gdf_filtered)} fences containing `fence_type`.")
        else:
            print(f"⚠ No `fence_type` found for {country_name}, no file generated.")

    print(f"🎉 Processing completed! All files are stored in '{region_folder}'")

In [None]:
# Example usage: Process fences for the Europe region
process_fence_data("Europe")

# Extract unique fence types

This script processes **GeoPackage files** containing categorized fence data (`fences_classes.gpkg`) for different countries in a **specified region or continent**.  
It extracts **unique `fence_type` values** and saves them in a text file for each country.


In [None]:
import geopandas as gpd
import os
from tqdm import tqdm

def extract_unique_fence_types(region_folder):
    """
    Extracts unique 'fence_type' values from classified fence data for a specific region or continent.

    Parameters:
    - region_folder (str): Path to the folder containing country-level subfolders.

    Output:
    - Generates a text file for each country containing unique 'fence_type' values.
    """

    # 📂 Iterate through each country folder
    for country_name in tqdm(os.listdir(region_folder), desc=f"Processing {region_folder}"):
        country_path = os.path.join(region_folder, country_name)

        # Ensure it's a directory
        if not os.path.isdir(country_path):
            continue

        # 📌 Path to the classified GeoPackage file
        input_gpkg = os.path.join(country_path, f"{country_name}_classes.gpkg")
        if not os.path.exists(input_gpkg):
            print(f"⚠ No file found for {country_name}, skipping...")
            continue

        # 📂 Load the GeoPackage file
        gdf = gpd.read_file(input_gpkg, layer="fences_classes")

        # 📌 Extract unique values of 'fence_type'
        unique_fence_types = gdf["fence_type"].dropna().unique()

        # 📌 Save unique values to a text file
        output_txt = os.path.join(country_path, f"{country_name}_classes_unique.txt")
        with open(output_txt, "w", encoding="utf-8") as f:
            for value in sorted(unique_fence_types):  # Sort for readability
                f.write(value + "\n")

        print(f"✅ {output_txt} created with {len(unique_fence_types)} unique values.")

    print(f"🎉 Extraction completed! All text files are stored in '{region_folder}'.")


In [None]:
# Example usage: Extract unique fence types for the Europe region
extract_unique_fence_types("Europe")

# Merge unique fence types from all countries in a region

This script scans through country-specific text files containing **unique `fence_type` values** and merges them into a single **global output file** for a given **region or continent**.

In [None]:
import os

def merge_fence_types(region_folder, output_file):
    """
    Merges all unique 'fence_type' values from individual country text files into a single output file.

    Parameters:
    - region_folder (str): Path to the folder containing country-level subfolders.
    - output_file (str): Path to the global text file where unique fence types will be stored.

    Output:
    - A single text file containing all unique 'fence_type' values from the region.
    """

    unique_fence_types = set()

    # 📂 Iterate through each country folder
    for country_name in os.listdir(region_folder):
        country_path = os.path.join(region_folder, country_name)

        # Ensure it's a directory
        if not os.path.isdir(country_path):
            continue

        # 📌 Path to the country-specific fence type file
        input_txt = os.path.join(country_path, f"{country_name}_classes_unique.txt")
        if not os.path.exists(input_txt):
            print(f"⚠ No file found for {country_name}, skipping...")
            continue

        # 📂 Read unique 'fence_type' values and add to the set
        with open(input_txt, "r", encoding="utf-8") as f:
            for line in f:
                unique_fence_types.add(line.strip())

    # 📌 Write the sorted unique values to the global output file
    with open(output_file, "w", encoding="utf-8") as f:
        for value in sorted(unique_fence_types):  # Sort for readability
            f.write(value + "\n")

    print(f"✅ Merge completed! All unique values are stored in '{output_file}'")

In [None]:
# Example usage: Merge all fence types for the Europe region
merge_fence_types("Europe", "all_fence_types.txt")

# Correct fence type labels

This script applies **manual corrections** to `fence_type` values in **GeoPackage files** for different countries within a given **region or continent**.  
The corrections are based on a **CSV mapping file**, manually created from a previously extracted list of unique fence types.


In [None]:
import os
import pandas as pd
import geopandas as gpd

def load_mapping(mapping_file):
    """
    Loads the manually created mapping from a CSV file.

    Parameters:
    - mapping_file (str): Path to the CSV file containing original and corrected fence types.

    Returns:
    - dict: A dictionary mapping original values to corrected values.
    """
    mapping_dict = {}

    # Read the CSV file manually, considering the last ";" as the separator
    with open(mapping_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if ";" in line:
                last_sep = line.rfind(";")  # Find the last ";"
                original_value = line[:last_sep].strip()  # Everything before the last ";"
                corrected_value = line[last_sep+1:].strip()  # Everything after the last ";"

                if corrected_value == "NULL":
                    corrected_value = None  # Convert "NULL" into Python None

                mapping_dict[original_value] = corrected_value

    return mapping_dict

def correct_fence_types(region_folder, mapping_file):
    """
    Applies the manual mapping to correct 'fence_type' values in country-specific GeoPackage files.

    Parameters:
    - region_folder (str): Path to the folder containing country-level subfolders.
    - mapping_file (str): Path to the CSV file with manual corrections.

    Output:
    - Updates the GeoPackage files with corrected fence types.
    """

    # Load the manual mapping
    mapping_dict = load_mapping(mapping_file)

    # 📂 Iterate through each country folder
    for country_name in os.listdir(region_folder):
        country_path = os.path.join(region_folder, country_name)

        if os.path.isdir(country_path):  # Ensure it's a directory
            gpkg_file = os.path.join(country_path, f"{country_name}_classes.gpkg")

            if os.path.exists(gpkg_file):  # Check if the file exists
                print(f"📌 Processing file: {gpkg_file}")

                # Load the GeoPackage file
                gdf = gpd.read_file(gpkg_file)

                if "fence_type" in gdf.columns:
                    # Apply mapping to correct values
                    def correct_fence_type(value):
                        if pd.notna(value):
                            last_part = str(value).split(";")[-1].strip()  # Extract last value after last ";"
                            return mapping_dict.get(last_part, value)  # Apply correction if found
                        return None

                    gdf["fence_type"] = gdf["fence_type"].apply(correct_fence_type)

                    # 📌 Save the cleaned file
                    cleaned_gpkg = os.path.join(country_path, f"{country_name}_cleaned.gpkg")
                    gdf.to_file(cleaned_gpkg, layer="fences", driver="GPKG")

                    print(f"✅ Corrected file saved: {cleaned_gpkg}")
                else:
                    print(f"⚠️ No 'fence_type' column found in {gpkg_file}")
            else:
                print(f"❌ File not found: {gpkg_file}")

    print("🎯 Processing completed for all countries!")


In [None]:
# Example usage: Apply corrections to the Europe region
correct_fence_types("Europe", "/home/user/all_fence_types.csv")

# Merge cleaned fence data

This script merges all **cleaned** fence datasets (`*_cleaned.gpkg`) from different countries in a **specified region or continent** into a single **GeoPackage file** for further analysis.


In [None]:
import os
import geopandas as gpd
import pandas as pd  # Required for concatenation

def merge_cleaned_fences(region_folder, output_gpkg):
    """
    Merges all cleaned GeoPackage files from different countries into a single GeoPackage.

    Parameters:
    - region_folder (str): Path to the folder containing country-level subfolders.
    - output_gpkg (str): Path to the final merged GeoPackage file.

    Output:
    - A single GeoPackage containing all fences from the selected region.
    """

    gdfs = []  # List to store individual country GeoDataFrames

    # 📂 Iterate through each country folder
    for country_name in os.listdir(region_folder):
        country_path = os.path.join(region_folder, country_name)

        if os.path.isdir(country_path):  # Ensure it's a directory
            cleaned_gpkg = os.path.join(country_path, f"{country_name}_cleaned.gpkg")

            if os.path.exists(cleaned_gpkg):  # Check if the file exists
                print(f"📌 Loading: {cleaned_gpkg}")

                # Load the GeoPackage data
                gdf = gpd.read_file(cleaned_gpkg, layer="fences")

                # Add a column to identify the country of origin
                gdf["country"] = country_name  

                # Append to list
                gdfs.append(gdf)
            else:
                print(f"⚠️ File not found for {country_name}: {cleaned_gpkg}")

    # 📌 Merge data if files exist
    if gdfs:
        merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs="EPSG:4326")

        # 📌 Save the merged dataset to a single GeoPackage file
        merged_gdf.to_file(output_gpkg, layer="fences", driver="GPKG")

        print(f"✅ Merge completed! File saved as: {output_gpkg}")
    else:
        print("❌ No `_cleaned.gpkg` files found, nothing to merge.")

In [None]:
# Example usage: Merge cleaned fences for the Europe region
merge_cleaned_fences("Europe", "Europe_merged.gpkg")