# 📘 Corine Land Cover 2018 – Zonal Statistics by Municipality (Spain)
This notebook computes area and percentage of each CLC category per municipality, separately for:

Mainland Spain (Península Ibérica)

Canary Islands

It works directly on layers already loaded in QGIS, using PyQGIS, and exports the final result as a CSV table ready for temporal analysis.

## 0. Imports

In [None]:
# Core QGIS classes
from qgis.core import QgsProject, QgsVectorLayer

# Zonal statistics (QGIS 3.x)
from qgis.analysis import QgsZonalStatistics

# Data handling
import pandas as pd


In [None]:
# List file names
[layer.name() for layer in QgsProject.instance().mapLayers().values()]


## 1. Helper function: safely retrieve a layer by name
this avoids silent errors when layers names do not match exactly

In [None]:
def get_layer_by_name(name):
    """
    Retrieve a layer from the current QGIS project by its exact name.
    Raises an explicit error if not found.
    """
    layers = QgsProject.instance().mapLayersByName(name)
    if not layers:
        raise ValueError(f"Layer not found: {name}")
    return layers[0]


## 2. Main function: CLC zonal statistics -> DataFrame

In [None]:
def clc_vector_stats_to_df(
    municipalities_layer_name,
    clc_layer_name,
    clc_class_field,
    natcode_field="NATCODE",
    name_field="NAMEUNIT",
    year=2018,
    region="peninsula"
):
    """
    Computes area and percentage of each CLC class per municipality
    using vector intersection (CLC is vector).

    Geometries are fixed before intersection to avoid topology errors.
    """

    import processing
    import pandas as pd

    # --------------------------------------------------
    # Load layers
    # --------------------------------------------------
    municipalities = get_layer_by_name(municipalities_layer_name)
    clc = get_layer_by_name(clc_layer_name)

    target_crs = "EPSG:3035"  # equal-area, safe for area computation

    # --------------------------------------------------
    # Reproject
    # --------------------------------------------------
    mun_proj = processing.run(
        "native:reprojectlayer",
        {
            "INPUT": municipalities,
            "TARGET_CRS": target_crs,
            "OUTPUT": "memory:"
        }
    )["OUTPUT"]

    clc_proj = processing.run(
        "native:reprojectlayer",
        {
            "INPUT": clc,
            "TARGET_CRS": target_crs,
            "OUTPUT": "memory:"
        }
    )["OUTPUT"]

    # --------------------------------------------------
    # Fix geometries (CRITICAL STEP)
    # --------------------------------------------------
    mun_fixed = processing.run(
        "native:fixgeometries",
        {
            "INPUT": mun_proj,
            "OUTPUT": "memory:"
        }
    )["OUTPUT"]

    clc_fixed = processing.run(
        "native:fixgeometries",
        {
            "INPUT": clc_proj,
            "OUTPUT": "memory:"
        }
    )["OUTPUT"]

    # --------------------------------------------------
    # Intersection
    # --------------------------------------------------
    inter = processing.run(
        "native:intersection",
        {
            "INPUT": mun_fixed,
            "OVERLAY": clc_fixed,
            "OUTPUT": "memory:"
        }
    )["OUTPUT"]

    # --------------------------------------------------
    # Build table
    # --------------------------------------------------
    records = []

    for f in inter.getFeatures():
        mun_code = f[natcode_field]
        mun_name = f[name_field]
        clc_class = f[clc_class_field]

        area_ha = f.geometry().area() / 10_000

        if area_ha > 0:
            records.append({
                "NATCODE": mun_code,
                "NAMEUNIT": mun_name,
                "clc_class": clc_class,
                "area_ha": area_ha,
                "year": year,
                "region": region
            })

    df = pd.DataFrame(records)

    # --------------------------------------------------
    # Percentage of municipality
    # --------------------------------------------------
    total_area = (
        df.groupby("NATCODE")["area_ha"]
        .sum()
        .rename("mun_area_ha")
    )

    df = df.join(total_area, on="NATCODE")
    df["pct_municipality"] = (df["area_ha"] / df["mun_area_ha"]) * 100
    df = df.drop(columns="mun_area_ha")

    return df


## 3. Mainland Spain (Iberian Peninsula & Balearic Islands)

In [None]:
df_peninsula = clc_vector_stats_to_df(
    municipalities_layer_name="Municipios_peninbal_etrs89",
    clc_layer_name="CLC2018_ES — CLC18_ES",
    clc_class_field="CODE_18",
    year=2018,
    region="peninsula"
)


## 4. Canary Islands

In [None]:
df_canarias = clc_vector_stats_to_df(
    municipalities_layer_name="Municipios_canarias_regcan95",
    clc_layer_name="CLC2018_ES — CLC18_ES_Canarias",
    clc_class_field="CODE_18",
    year=2018,
    region="canarias"
)

## 5. Union, CLC Levels (Legend) and Export

In [None]:
import pandas as pd

df_clc_2018 = pd.concat([df_peninsula, df_canarias], ignore_index=True)

## 6. CLC Levels (Legend)

In [None]:
# ==================================================
# CLC hierarchy: numeric → EN + ES (single source of truth)
# ==================================================

# ---------
# LEVEL 1
# ---------
clc_lvl1 = {
    1: ("Artificial surfaces", "Superficies artificiales"),
    2: ("Agricultural areas", "Zonas agrícolas"),
    3: ("Forests and semi-natural areas", "Zonas forestales y naturales"),
    4: ("Wetlands", "Zonas húmedas"),
    5: ("Water bodies", "Superficies de agua"),
}

# ---------
# LEVEL 2
# ---------
clc_lvl2 = {
    11: ("Urban fabric", "Tejido urbano"),
    12: ("Industrial, commercial and transport units", "Zonas industriales, comerciales y de transporte"),
    13: ("Mine, dump and construction sites", "Zonas de extracción, vertederos y construcción"),
    14: ("Artificial, non-agricultural vegetated areas", "Zonas verdes artificiales"),

    21: ("Arable land", "Tierras de labor"),
    22: ("Permanent crops", "Cultivos permanentes"),
    23: ("Pastures", "Praderas"),
    24: ("Heterogeneous agricultural areas", "Zonas agrícolas heterogéneas"),

    31: ("Forests", "Bosques"),
    32: ("Scrub and/or herbaceous vegetation associations", "Vegetación arbustiva y herbácea"),
    33: ("Open spaces with little or no vegetation", "Espacios abiertos con poca o nula vegetación"),

    41: ("Inland wetlands", "Zonas húmedas continentales"),
    42: ("Coastal wetlands", "Zonas húmedas litorales"),

    51: ("Inland waters", "Aguas continentales"),
    52: ("Marine waters", "Aguas marinas"),
}

# ---------
# LEVEL 3
# ---------
clc_lvl3 = {
    111: ("Continuous urban fabric", "Tejido urbano continuo"),
    112: ("Discontinuous urban fabric", "Tejido urbano discontinuo"),
    121: ("Industrial or commercial units", "Zonas industriales o comerciales"),
    122: ("Road and rail networks and associated land", "Red viaria y ferroviaria"),
    123: ("Port areas", "Zonas portuarias"),
    124: ("Airports", "Aeropuertos"),
    131: ("Mineral extraction sites", "Zonas de extracción minera"),
    132: ("Dump sites", "Vertederos"),
    133: ("Construction sites", "Zonas en construcción"),
    141: ("Green urban areas", "Zonas verdes urbanas"),
    142: ("Sport and leisure facilities", "Instalaciones deportivas y recreativas"),
    211: ("Non-irrigated arable land", "Tierras de labor en secano"),
    212: ("Permanently irrigated land", "Tierras de labor en regadío"),
    213: ("Rice fields", "Arrozales"),
    221: ("Vineyards", "Viñedos"),
    222: ("Fruit trees and berry plantations", "Frutales"),
    223: ("Olive groves", "Olivares"),
    231: ("Pastures", "Praderas"),
    241: ("Annual crops associated with permanent crops", "Cultivos anuales asociados a cultivos permanentes"),
    242: ("Complex cultivation patterns", "Mosaico de cultivos"),
    243: ("Land principally occupied by agriculture, with significant areas of natural vegetation",
          "Terrenos agrícolas con importantes espacios naturales"),
    244: ("Agro-forestry areas", "Sistemas agroforestales"),
    311: ("Broad-leaved forest", "Bosques de frondosas"),
    312: ("Coniferous forest", "Bosques de coníferas"),
    313: ("Mixed forest", "Bosque mixto"),
    321: ("Natural grasslands", "Pastizales naturales"),
    322: ("Moors and heathland", "Brezales y matorrales"),
    323: ("Sclerophyllous vegetation", "Vegetación esclerófila"),
    324: ("Transitional woodland-shrub", "Matorral y bosque en transición"),
    331: ("Beaches, dunes, sands", "Playas, dunas y arenales"),
    332: ("Bare rocks", "Roquedos"),
    333: ("Sparsely vegetated areas", "Espacios con vegetación escasa"),
    334: ("Burnt areas", "Zonas quemadas"),
    335: ("Glaciers and perpetual snow", "Glaciares y nieves perpetuas"),
    411: ("Inland marshes", "Marismas continentales"),
    412: ("Peat bogs", "Turberas"),
    421: ("Salt marshes", "Marismas salinas"),
    422: ("Salines", "Salinas"),
    423: ("Intertidal flats", "Zonas intermareales"),
    511: ("Water courses", "Cursos de agua"),
    512: ("Water bodies", "Láminas de agua"),
    521: ("Coastal lagoons", "Lagunas costeras"),
    522: ("Estuaries", "Estuarios"),
    523: ("Sea and ocean", "Mar y océano"),
}

# ==================================================
# Apply hierarchy (with correct typing)
# ==================================================

# Ensure numeric CLC code
df_clc_2018["clc_class"] = df_clc_2018["clc_class"].astype(int)

# Level 3 (priority)
df_clc_2018["clc_level_3"] = df_clc_2018["clc_class"].map(lambda x: clc_lvl3.get(x, (None, None))[0])
df_clc_2018["clc_level_3_es"] = df_clc_2018["clc_class"].map(lambda x: clc_lvl3.get(x, (None, None))[1])

# Level 2
df_clc_2018["clc_level_2"] = (
    df_clc_2018["clc_class"].astype(str).str[:2].astype(int)
    .map(lambda x: clc_lvl2.get(x, (None, None))[0])
)
df_clc_2018["clc_level_2_es"] = (
    df_clc_2018["clc_class"].astype(str).str[:2].astype(int)
    .map(lambda x: clc_lvl2.get(x, (None, None))[1])
)

# Level 1
df_clc_2018["clc_level_1"] = (
    df_clc_2018["clc_class"].astype(str).str[0].astype(int)
    .map(lambda x: clc_lvl1.get(x, (None, None))[0])
)
df_clc_2018["clc_level_1_es"] = (
    df_clc_2018["clc_class"].astype(str).str[0].astype(int)
    .map(lambda x: clc_lvl1.get(x, (None, None))[1])
)

df_clc_2018.head()


In [None]:
# ==================================================
# Final column order (paper & GitHub ready)
# ==================================================

final_columns = [
    # Identifiers
    "NATCODE",
    "NAMEUNIT",

    # Temporal
    "year",

    # CLC numeric code
    "clc_class",

    # Metrics
    "area_ha",
    "pct_municipality",

    # CLC hierarchy (EN, priority order 3 → 2 → 1)
    "clc_level_3",
    "clc_level_2",
    "clc_level_1",

    # CLC hierarchy (ES)
    "clc_level_3_es",
    "clc_level_2_es",
    "clc_level_1_es",
]

# Apply order safely (drop any other columns, e.g. 'region')
df_clc_2018 = df_clc_2018[[c for c in final_columns if c in df_clc_2018.columns]]

df_clc_2018.head()


## 7. Export

In [None]:
df_clc_2018.to_csv(
    r"C:\Users\juanz\OneDrive\Desktop\UCM\RURIM ESCAPE\GeoSpatial\02_CLC\CLC2018_GPKG/CLC2018_municipios_ES_area_pct.csv",
    index=False
)