# ESA Land Cover

[ESA World Cover v200 2021](https://developers.google.com/earth-engine/datasets/catalog/ESA_WorldCover_v200)

- 10 : Tree cover
- 20 : Shrubland
- 30 : Grassland
- 40 : Cropland
- 50 : Built-up
- 60 : Bare / sparse vegetation
- 70 : Snow and ice
- 80 : Permanent water bodies
- 90 : Herbaceous wetland
- 95 : Mangroves
- 100 : Moss and lichen


In [1]:
import sys

sys.path.insert(0, "../../src")
from imports import *

init_notebook()

# Required libraries
import rasterio
from rasterio.merge import merge
import numpy as np
import glob
import os

# Merging and Wrangling ESA Files


### Google Earth Enginge Download Code


```javascript
// Load the ESA WorldCover dataset (v200)
var esaWorldCoverCollection = ee.ImageCollection("ESA/WorldCover/v200");

// Define the region over France
// var franceRegion = ee.Geometry.Rectangle([-5.142, 41.333, 9.561, 51.124]);

// Create a list of pre-defined bounding boxes covering the region
// [xmin, ymax, xmax, ymin]
var subregionBounds = [
  [
    -5.094805241074378, 51.28451913909632, 2.6066107745506217, 46.62898523639934,
  ], // Top left quadrant
  [2.6066107745506217, 51.28451913909632, 9.74202276019696, 46.62898523639934], // Top right quadrant
  [2.6066107745506217, 46.62898523639934, 9.74202276019696, 41.29366630612533], // Bottom right quadrant
  [
    -5.094805241074378, 46.62898523639934, 2.6066107745506217, 41.29366630612533,
  ], // Bottom left quadrant
];

// Function to export images for each subregion
var exportImagesForSubregion = function (bounds, index) {
  var subregionGeometry = ee.Geometry.Rectangle(bounds);
  var subregionName = "Subregion_" + index;

  // Filter the ESA WorldCover collection for the subregion
  var subregionCollection =
    esaWorldCoverCollection.filterBounds(subregionGeometry);

  // Get a list of image IDs for the subregion
  var subregionImageIds = subregionCollection.aggregate_array("system:id");

  // Define a function to export images for the subregion
  var exportSubregionImage = function (imageId) {
    var image = ee.Image(imageId);

    Export.image.toDrive({
      image: image,
      description: imageId.split("/").join("_") + "_",
      folder: "ee_land_cover_map_esa", // Change this to your desired folder name
      scale: 10, // Change this to your desired scale
      maxPixels: 1e13,
      region: subregionGeometry,
      crs: "EPSG:4326",
    });
  };

  // Use evaluate to retrieve image IDs for the subregion as an array
  subregionImageIds.evaluate(function (ids) {
    // Check if ids is not null or undefined
    if (ids) {
      ids.forEach(exportSubregionImage);
    } else {
      print("No image IDs found for subregion: " + subregionName);
    }
  });
};

// Loop through the subregion bounding boxes and export images for each subregion
subregionBounds.forEach(exportImagesForSubregion);
```


## Merge into one tiff file


In [2]:
def combine_tiff_files_and_modify_pixels(
    file_paths,
    binary_target,
    output_file,
):
    """
    Combine multiple TIFF files into a single TIFF file and set all pixels with value 10 to 1, the rest to 0.

    Parameters:
    - file_paths: List of strings. Paths to the TIFF files to be combined.
    - output_file: String. The path where the combined and modified TIFF file will be saved.
    """

    # Check input
    if binary_target == "forest":
        target_value = 10
    elif binary_target == "built":
        target_value = 50
    elif binary_target == "crop":
        target_value = 40
    elif binary_target == "water":
        target_value = 80
    elif binary_target == "vegetation":
        target_value = [10, 20, 30, 90, 95, 100]
    elif binary_target == "bare":
        target_value = [70, 60]
    # elif binary_target == "vegetation10":
    # target_value = 10
    elif binary_target == "vegetation20":
        target_value = 20
    elif binary_target == "vegetation30":
        target_value = 30
    elif binary_target == "vegetation90":
        target_value = 90
    elif binary_target == "vegetation95":
        target_value = 95
    elif binary_target == "vegetation100":
        target_value = 100
    elif binary_target == "bare60":
        target_value = 60
    elif binary_target == "bare70":
        target_value = 70
    else:
        chime.error()
        raise ValueError("Invalid binary target")

    # List to hold the datasets
    src_files_to_mosaic = []

    # Open and append each raster to the list
    for fp in tqdm(file_paths):
        src = rasterio.open(fp)
        src_files_to_mosaic.append(src)

    # Merge function returns a single mosaic array and the transformation info
    print("Merging files...")
    mosaic, out_trans = merge(src_files_to_mosaic)

    # Modify the pixel values: target_value -> 1, others -> 0
    print(f"Creating binary for {binary_target}...")
    if type(target_value) == list:
        for value in target_value:
            mosaic_modified = np.where(mosaic == value, 1, mosaic)
        mosaic_modified = np.where(mosaic == 1, 1, 0)
    else:
        mosaic_modified = np.where(mosaic == target_value, 1, 0)

    # Copy the metadata
    out_meta = src.meta.copy()

    # Update the metadata for the new dataset
    out_meta.update(
        {
            "driver": "GTiff",
            "height": mosaic_modified.shape[1],
            "width": mosaic_modified.shape[2],
            "transform": out_trans,
            "dtype": "uint8",  # Update data type to uint8 for binary data
            "compress": "lzw",
        }
    )

    # Write the modified mosaic raster to disk
    print("Saving file...")
    with rasterio.open(output_file, "w", **out_meta) as dest:
        dest.write(mosaic_modified)

    chime.success()
    print(f"Modified combined TIFF file saved to: {output_file}")

    # Close all rasterio opened files
    for src in src_files_to_mosaic:
        src.close()

In [3]:
# Input
binary_targets = [
    # "vegetation",
    # "bare",
    # "vegetation20",
    # "vegetation30",
    # "vegetation90",
    # "vegetation95",
    # "vegetation100",
    # "bare60",
    # "bare70",
]

# Get path to individual ESA tif files
my_dir = "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/raw"
new_dir = here("data/final/land_cover_maps").as_posix()

all_files = os.listdir(my_dir)
all_files = [f for f in all_files if f.endswith(".tif")]
all_files = [my_dir + "/" + f for f in all_files]

output_file = "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/binary_maps/"

# Run Function
for binary_target in binary_targets:
    i_output_file = output_file + f"esa-binary_{binary_target}.tif"
    combine_tiff_files_and_modify_pixels(all_files, binary_target, i_output_file)

# Calculate Cover Percentages

- First extract zonal statistics via QGIS and then calculate percentages.
- Make sure to aggregate vegetation and bare indeces!


In [24]:
files = glob.glob(
    "/Volumes/SAMSUNG 1TB/land_cover_esa_v200/processed/zonal_statistics/*.csv"
)
# display(files)

first_file = True

for f in files:
    # Get variable name
    v = f.split("/")[-1].split("_")[3].split(".")[0]
    # Read data
    df = pd.read_csv(f)
    # Calculate percentage
    df[f"perc_{v}"] = round(df["_sum"] / df["_count"] * 100, 2)
    # Drop unnecessary columns
    df = df.drop(["_sum", "_count", "first_year"], axis=1, errors="ignore")
    # Attach do output
    if first_file:
        df_all = df
        first_file = False
    else:
        df_all = df_all.merge(df, on=["idp"], how="left")

# Sum up all variables with vegetation in their names
cols_to_sum = []
for c in df_all.columns:
    if "perc_vegetation" in c:
        cols_to_sum = cols_to_sum + [c]

# Sum up cols rowwise
df_all["perc_vegetation"] = df_all[cols_to_sum].sum(axis=1)
df_all = df_all.drop(cols_to_sum, axis=1)

# Sum up all variables with vegetation in their names
cols_to_sum = []
for c in df_all.columns:
    if "perc_bare" in c:
        cols_to_sum = cols_to_sum + [c]

# Sum up cols rowwise
df_all["perc_bare"] = df_all[cols_to_sum].sum(axis=1)
df_all = df_all.drop(cols_to_sum, axis=1)

# Save df for use in model
df_all.to_feather(
    "../../data/final/predictor_datasets/esa_landcover_percentages.feather"
)

# Quality Checks:
# Get row sum
df_all["perc_sum"] = df_all[
    [
        "perc_forest",
        "perc_built",
        "perc_crop",
        "perc_water",
        "perc_vegetation",
        "perc_bare",
    ]
].sum(axis=1)

# Check df
display(df_all.describe())
df_all

Unnamed: 0,idp,perc_forest,perc_crop,perc_built,perc_water,perc_vegetation,perc_bare,perc_sum
count,46311.0,46311.0,46311.0,46311.0,46311.0,46311.0,46311.0,46311.0
mean,859769.3,67.216752,12.103552,1.261751,0.620658,18.662184,0.134611,99.999507
std,218127.9,24.839122,18.261503,3.705622,3.039256,16.63557,1.23143,0.005802
min,500002.0,0.16,0.0,0.0,0.0,0.0,0.0,99.97
25%,664849.5,48.245,0.0,0.0,0.0,5.02,0.0,100.0
50%,850719.0,70.93,2.62,0.17,0.0,14.46,0.0,100.0
75%,1022722.0,89.34,17.58,0.87,0.03,28.255,0.0,100.0
max,1231404.0,100.0,98.35,86.77,81.67,99.21,81.83,100.02


Unnamed: 0,idp,perc_forest,perc_crop,perc_built,perc_water,perc_vegetation,perc_bare,perc_sum
0,500008,98.32,0.00,0.04,1.50,0.14,0.00,100.00
1,500013,53.03,39.40,0.63,0.00,6.94,0.00,100.00
2,500098,44.68,34.61,3.05,0.00,17.60,0.07,100.01
3,500103,59.79,16.62,0.00,0.00,23.59,0.00,100.00
4,500137,90.69,4.99,0.00,0.00,4.31,0.00,99.99
...,...,...,...,...,...,...,...,...
46306,1231381,21.29,51.49,0.38,0.00,26.78,0.07,100.01
46307,1231387,90.29,1.40,0.00,0.00,8.31,0.00,100.00
46308,1231388,99.30,0.00,0.00,0.00,0.70,0.00,100.00
46309,1231392,98.67,0.00,0.00,0.00,1.33,0.00,100.00


## Aggregate from 10 to 30m


In [4]:
import rasterio
import numpy as np
from skimage.measure import block_reduce


def aggregate_map(input_file, output_file, original_pixel_size, target_pixel_size):
    """
    Aggregates a binary map from a finer to a coarser resolution and calculates the percentage of '1' pixels.

    Parameters:
    - input_file: Path to the input binary TIFF file.
    - output_file: Path where the aggregated TIFF file will be saved.
    - original_pixel_size: The size of the pixels in the original map.
    - target_pixel_size: The size of the pixels in the aggregated map.
    """
    with rasterio.open(input_file) as src:
        data = src.read(1)  # Read the first band
        meta = src.meta

        # Calculate the factor of aggregation
        agg_factor = int(target_pixel_size / original_pixel_size)

        # Perform aggregation: calculate the mean in blocks of agg_factor x agg_factor
        # This gives us the percentage of '1' pixels in each new aggregated pixel
        aggregated_data = block_reduce(
            data, block_size=(agg_factor, agg_factor), func=np.mean
        )

        # Update metadata for the aggregated dataset
        new_meta = meta.copy()
        new_meta.update(
            {
                "height": aggregated_data.shape[0],
                "width": aggregated_data.shape[1],
                "transform": rasterio.Affine(
                    meta["transform"].a * agg_factor,
                    meta["transform"].b,
                    meta["transform"].c,
                    meta["transform"].d,
                    meta["transform"].e * agg_factor,
                    meta["transform"].f,
                ),
            }
        )

        # Write the aggregated data to a new file
        with rasterio.open(output_file, "w", **new_meta) as dest:
            dest.write(aggregated_data, 1)

In [None]:
# User input
input_file = output_file  # Path to the input binary TIFF file, from above
output_file = here(
    "data/final/land_cover_maps/esa_v200_10m_merged_aggregated.tif"
)  # Path where the aggregated TIFF file will be saved
original_pixel_size = 10  # Original resolution (assuming 10m)
target_pixel_size = 30  # Target resolution (assuming 30m)


# Run function
aggregate_map(input_file, output_file, original_pixel_size, target_pixel_size)

print(f"Aggregated TIFF file saved to: {output_file}")