In [1]:
import os
import numpy as np
import xarray as xr
import rioxarray
import zarr
from numcodecs import Blosc
import datetime
import glob
import re
import gc
import rasterio.errors


In [2]:
# start_year = 2019
# end_year = 2022

start_year = 2017
now = datetime.datetime.now()
end_year = np.datetime64(now).astype("datetime64[Y]").astype(int) + 1970


years = np.arange(start_year, end_year + 1, dtype="int32")
zarr_path = "../../eodc/products/eodc/clms_vegetation_total_productivity/CLMS.zarr"

In [3]:
test = np.arange(start_year, end_year+1, 1)
print(test.shape)

(9,)


In [4]:
store = zarr.storage.LocalStore(zarr_path)
compressor = zarr.codecs.BloscCodec()

In [5]:
# Run ONLY when creating the zarr storage!!
# root = zarr.group(store=store, overwrite=True)
root = zarr.group(store=store)

In [6]:
# Run ONLY when creating the new dataset group!!
dataset = "VPP"
ds = root.require_group(dataset)

In [7]:
x_extent = np.arange(4200000, 4900000, 10)
y_extent = np.arange(2500000, 3000000, 10)


season1 = "SEASON1"
season2 = "SEASON2"

In [8]:
# # Create Arrays for dataset

# ds.create_array(
#     name="time",
#     shape=(len(years),),
#     dtype="int32",
#     chunks=(len(years),),
#     dimension_names=["time"],
#     attributes={
#         "units": "year",  # store actual year values
#         "calendar": "noleap"
#     },
#     overwrite=True
# )[:] = years

# x_array = ds.create_array(
#     name="x",
#     shape=x_extent.shape,
#     dtype="int32",
#     chunks=(len(x_extent),),
#     dimension_names=["x"],
#     overwrite=True
# )
# x_array[:] = x_extent

# y_array = ds.create_array(
#     name="y",
#     shape=y_extent.shape,
#     dtype="int32",
#     chunks=(len(y_extent),),
#     dimension_names=["y"],
#     overwrite=True
# )
# y_array[:] = y_extent

# print(len(years))
# print(x_extent.shape)



In [9]:
# test = rioxarray.open_rasterio(f"../../eodc/private/openeo_platform/zarr_nacho/VPP_{productType}/VPP_2020_S2_E48N27-03035-010m_V101_s1_{productType}.tif")

# print(test)

In [10]:
fill_value_SPROD = 65535
scale_factor_SPROD = 0.1

attributes_SPROD={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": 0.1,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_SPROD,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 1095",
        "add_offset": 0.0,
        "long_name": "Season productivity (Small integral)",
    },


fill_value_RSLOPE = -32768
scale_factor_RSLOPE = 0.0001

attributes_RSLOPE={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_RSLOPE,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_RSLOPE,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Left slope (derivative)",
    },

fill_value_QFLAG = 0
scale_factor_QFLAG = 1.0

attributes_QFLAG={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_QFLAG,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_QFLAG,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "Flag_value": "(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)",
        "add_offset": 0.0,
        "long_name": "Vegetation and Phenological Production Quality Flage",
    },


fill_value_EOSD = 0
scale_factor_EOSD = 1.0

attributes_EOSD={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_EOSD,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_EOSD,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "-",
        "add_offset": 0.0,
        "long_name": "End of season date",
    },


fill_value_LSLOPE = -32768
scale_factor_LSLOPE = 0.0001

attributes_LSLOPE={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_LSLOPE,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_LSLOPE,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Left slope (derivative)",
    },


fill_value_SOSV = -32768
scale_factor_SOSV = 0.0001

attributes_SOSV={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_SOSV,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_SOSV,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Start of season value",
    },

fill_value_AMPL = -32768
scale_factor_AMPL = 0.0001

attributes_AMPL={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_AMPL,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_AMPL,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.8",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Seasonal amplitude",
    },

fill_value_MAXV = -32768
scale_factor_MAXV = 0.0001

attributes_MAXV={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_MAXV,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_MAXV,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.8",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Max of season value (value at peak)",
    },

fill_value_EOSV = -32768
scale_factor_EOSV = 0.0001

attributes_EOSV={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_EOSV,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_EOSV,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.8",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "End of season value",
    },

fill_value_SOSD = 0
scale_factor_SOSD = 1.0

attributes_SOSD={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_SOSD,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_SOSD,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.8",
        "PhysRange": "-",
        "add_offset": 0.0,
        "long_name": "Start of season date",
    },

fill_value_MAXD = 0
scale_factor_MAXD = 1.0

attributes_MAXD={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_MAXD,
        "file_creation": "2023:05:14 08:42:35",
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_MAXD,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.8",
        "PhysRange": "0 to 1095",
        "add_offset": 0.0,
        "long_name": " Max of season date (time at peak)",
    },

fill_value_TPROD = 65535
scale_factor_TPROD = 0.1

attributes_TPROD={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": 0.1,
        "file_creation": "2021:07:23 13:22:50",
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_TPROD,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 1095",
        "add_offset": 0.0,
        "long_name": "Total productivity (Large integral)",
    },


fill_value_LENGTH = 0
scale_factor_LENGTH = 1.0

attributes_LENGTH={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_LENGTH,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_LENGTH,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 1096",
        "add_offset": 0.0,
        "long_name": "Season Length",
        "units": "days"
    },

fill_value_MINV = -32768
scale_factor_MINV = 0.0001

attributes_MINV={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2021",
        "scale_factor": scale_factor_MINV,
        "input_time_window": "2016-10-01 to 2021-02-29",
        "_FillValue": fill_value_MINV,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.5",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Base Value (value at minimum)",
        "units": "days"
    },

In [11]:
parameters = ("MINV", "MAXD", "LENGTH", "SOSD", "QFLAG", "EOSV", "TPROD", "MAXV", "AMPL", "SOSV", "LSLOPE", "EOSD", "RSLOPE", "SPROD")

In [18]:
zarr.consolidate_metadata(store)

productType = "SPROD"

pt = ds.require_group(productType)


shape = (len(years), len(y_extent), len(x_extent))
chunk_shape = (1, 10000, 10000)

fv =  globals()[f"fill_value_{productType}"]

if -32768 <= fv < 0:
    dtype = "int16"
elif 0 <= fv <= 65535:
    dtype = "uint16"



In [19]:
# SEASON 1

data_array = pt.create_array(
    name=season1,
    shape=shape,
    chunks=chunk_shape,
    dtype=dtype,
    fill_value = globals()[f"fill_value_{productType}"],
    compressor=compressor,
    dimension_names=["time", "y", "x"],
    attributes=globals()[f"attributes_{productType}"],
    overwrite=True
)


  compressors = _parse_deprecated_compressor(


In [20]:
#current_start = 0 when we start in 2017
current_start = 2
current_end = current_start
s = "s1"
path = f"../../eodc/private/openeo_platform/zarr_nacho/VPP_{productType}/*.tif"

for y in years:
    print(f"year = {y}")

    tif_files = sorted([
        f for f in glob.glob(path)
        if "100m" not in f and f"_{y}_" in f and f"_{s}_" in os.path.basename(f)
    ])
        
    for filepath in tif_files:

        print(f"file: {filepath}")
        
        try:
            file = rioxarray.open_rasterio(filepath)
            tp = file.values.astype("float32")
        except Exception as e:
            print(f"Unexpected error with file: {filepath}")
            print(f"Error: {e}")
            continue


        # Extract both xmin and ymin
        match = re.search(r'E(\d+)N(\d+)', filepath)
        if match:
            xmin = int(match.group(1))*1000
            ymin = int(match.group(2))*1000
                
        else:
            print("No match found.")

        xmax = xmin + 1000
        ymax = ymin + 1000
        # print(f"xmin: {xmin}, xmax: {xmax}, ymin: {ymin}, ymax: {ymax}")

        num_years = tp.shape[0]
        current_end = current_start + num_years
            
        data_array[current_start:current_end, ymin:ymax, xmin:xmax] = tp
        # print(data_array[current_start:current_end, ymin:ymin+3, xmin:xmin+3])

        # Free memory
        del tp
        gc.collect()
    current_start = current_end
    print(f"current start = {current_start}")


year = 2017
current start = 2
year = 2018
current start = 2
year = 2019
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N25-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N26-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N27-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N28-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N25-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N26-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N27-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N28-03035-010m_V101_s1_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_na

In [21]:
# SEASON 2

data_array = pt.create_array(
    name=season2,
    shape=shape,
    chunks=chunk_shape,
    dtype=dtype,
    fill_value = globals()[f"fill_value_{productType}"],
    compressor=compressor,
    dimension_names=["time", "y", "x"],
    attributes=globals()[f"attributes_{productType}"],
    overwrite=True
)


#current_start = 0 when we start in 2017
current_start = 2
current_end = current_start
s = "s2"
path = f"../../eodc/private/openeo_platform/zarr_nacho/VPP_{productType}/*.tif"

for y in years:
    print(f"year = {y}")

    tif_files = sorted([
        f for f in glob.glob(path)
        if "100m" not in f and f"_{y}_" in f and f"_{s}_" in os.path.basename(f)
    ])
        
    for filepath in tif_files:

        print(f"file: {filepath}")
        
        try:
            file = rioxarray.open_rasterio(filepath)
            tp = file.values.astype("float32")
        except Exception as e:
            print(f"Unexpected error with file: {filepath}")
            print(f"Error: {e}")
            continue


        # Extract both xmin and ymin
        match = re.search(r'E(\d+)N(\d+)', filepath)
        if match:
            xmin = int(match.group(1))*1000
            ymin = int(match.group(2))*1000
                
        else:
            print("No match found.")

        xmax = xmin + 1000
        ymax = ymin + 1000
        # print(f"xmin: {xmin}, xmax: {xmax}, ymin: {ymin}, ymax: {ymax}")

        num_years = tp.shape[0]
        current_end = current_start + num_years
            
        data_array[current_start:current_end, ymin:ymax, xmin:xmax] = tp
        # print(data_array[current_start:current_end, ymin:ymin+3, xmin:xmin+3])

        # Free memory
        del tp
        gc.collect()
    current_start = current_end
    print(f"current start = {current_start}")


  compressors = _parse_deprecated_compressor(


year = 2017
current start = 2
year = 2018
current start = 2
year = 2019
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N25-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N26-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N27-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E42N28-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N25-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N26-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N27-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_nacho/VPP_SPROD/VPP_2019_S2_E43N28-03035-010m_V101_s2_SPROD.tif
file: ../../eodc/private/openeo_platform/zarr_na

In [None]:
zarr.consolidate_metadata(store)

z = zarr.open(zarr_path, mode="r")
vpp_group = z["VPP"]
minv_group = vpp_group[productType]

# Define a bounding box (for example, a small chunk)
time_slice = slice(2, 3)      
y_slice = slice(25000, 29000)  
x_slice = slice(42000, 48000)   

# Read only the chunk
season1_chunk = minv_group["SEASON1"][time_slice, y_slice, x_slice]

# Read the corresponding coordinates
time = vpp_group["time"][time_slice]
y = vpp_group["y"][y_slice]
x = vpp_group["x"][x_slice]

# Wrap as xarray DataArray
da = xr.DataArray(
    season1_chunk,
    dims=("time", "y", "x"),
    coords={"time": time, "y": y, "x": x},
    name="SEASON2"
)
print(da)
print(da.max().item())