In [None]:
## Load packages
import xarray as xr
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import math
import pandas as pd
from scipy.stats import mode
import os

In [None]:
## Define paths
path_lc = 'path-raw-LC-data'
output_path = 'ooutput-path-resampled-LC'

In [None]:
# Resample, looping through year folders
years = range(2000, 2020)

# Loop through each year and process the corresponding file
for year in years:
    # Determine the correct file version based on the year
    if year <= 2015:
        file_version = 'v2.0.7'
    else:
        file_version = 'v2.1.1'
    
    # Loop through input file names
    input_filename = f"ESACCI-LC-L4-LCCS-Map-300m-P1Y-{year}-{file_version}.nc"
    output_filename = f"ESACCI-LC-L4-LCCS-Map-300m-P1Y-{year}-{file_version}_agg_1km.nc"
    
    # Construct input and output paths
    input_file = os.path.join(path_lc, input_filename)
    output_file = os.path.join(output_path, output_filename)

    # Open dataset
    data = xr.open_dataset(input_file)
    
    # Access the LC data
    lc_data = data.lccs_class
    lc_values = lc_data.values
    new_classification = np.zeros_like(lc_data)

    # Define classification rules
    classification_rules = [
        (0, 0, 0),  # 0
        (10, 49, 5),  # cropland
        (50, 69, 1),  # Broadleaf forests (assign 1)
        (70, 89, 2),  # Needleleaf forests (assign 2)
        (90, 119, 3), # Mixed
        (120, 159, 4), # shrubland
        (160, 220, 6)  # other
    ]

    # Apply classification rules
    for lower, upper, new_class in classification_rules:
        mask = (lc_values >= lower) & (lc_values <= upper)
        new_classification[mask] = new_class

    lc_array = xr.DataArray(
        new_classification,  # List of 2D arrays, one per time point
        coords=[data['time'].values, data['lat'].values, data['lon'].values], 
        dims=["time", "lat", "lon"]
    )

    # Add the new classification as a variable to the dataset
    data["lccs_class_new"] = lc_array

    # Add attributes to the new variable
    data["lccs_class_new"].attrs = {
        "units": "dimensionless",
        "long_name": "Land Cover class new classification"
    }

    lc_new = data.lccs_class_new

    resample_factor = 3

    # Function to compute the mode within blocks
    def block_mode(data, resample_factor):
        # Reshape the data into blocks of size `resample_factor`
        time_dim, lat_dim, lon_dim = data.shape

        # Ensure lat/lon dimensions are divisible by resample_factor
        if lat_dim % resample_factor != 0 or lon_dim % resample_factor != 0:
            raise ValueError("Latitude and Longitude dimensions must be divisible by the resample factor.")

        reshaped = data.reshape(
            time_dim,
            lat_dim // resample_factor, resample_factor,
            lon_dim // resample_factor, resample_factor
        )
        
        # Compute mode along the last two axes (resample_factor blocks)
        block_modes = mode(reshaped, axis=(-1, -3)).mode
        return block_modes

    # Resample the data
    resampled_data = block_mode(lc_new.values, resample_factor)

    # Update coordinates for the new grid
    new_lat = data['lat'][::resample_factor]
    new_lon = data['lon'][::resample_factor]

    # Create a new DataArray with the resampled data
    resampled_da = xr.DataArray(
        resampled_data,
        dims=['time', 'lat', 'lon'],
        coords={'time': data['time'],'lat': new_lat, 'lon': new_lon},
        name='lccs_class_new'
    )

    # Save as new NetCDF file
    resampled_ds = resampled_da.to_dataset()
    resampled_ds.to_netcdf(output_file)

    print(f"Processed and saved file: {output_file}")