In [1]:
import os
import numpy as np
import xarray as xr
import rioxarray
import zarr
from numcodecs import Blosc
import datetime
import glob
import re
import gc
import rasterio.errors


In [2]:
zarr_path = "../../eodc/products/eodc/clms_vegetation_total_productivity/CLMS.zarr"

start_year = 2017
now = datetime.datetime.now()
end_year = now.year

# Days of the month you want
days = [1, 11, 21]

# Generate all dates
dates = []
for year in range(start_year, end_year + 1):
    for month in range(1, 13):
        for day in days:
            try:
                dates.append(np.datetime64(datetime.date(year, month, day)))
            except ValueError:
                # In case of invalid dates like Feb 30
                pass

dates = np.array(dates)

In [3]:
# Convert datetime64[D] to int YYYYMMDD
dates_int = (dates.astype('datetime64[D]')
                 .astype('O'))  # convert to Python datetime.date
dates_int = np.array([d.year * 10000 + d.month * 100 + d.day for d in dates_int], dtype='int32')
dates_int

array([20170101, 20170111, 20170121, 20170201, 20170211, 20170221,
       20170301, 20170311, 20170321, 20170401, 20170411, 20170421,
       20170501, 20170511, 20170521, 20170601, 20170611, 20170621,
       20170701, 20170711, 20170721, 20170801, 20170811, 20170821,
       20170901, 20170911, 20170921, 20171001, 20171011, 20171021,
       20171101, 20171111, 20171121, 20171201, 20171211, 20171221,
       20180101, 20180111, 20180121, 20180201, 20180211, 20180221,
       20180301, 20180311, 20180321, 20180401, 20180411, 20180421,
       20180501, 20180511, 20180521, 20180601, 20180611, 20180621,
       20180701, 20180711, 20180721, 20180801, 20180811, 20180821,
       20180901, 20180911, 20180921, 20181001, 20181011, 20181021,
       20181101, 20181111, 20181121, 20181201, 20181211, 20181221,
       20190101, 20190111, 20190121, 20190201, 20190211, 20190221,
       20190301, 20190311, 20190321, 20190401, 20190411, 20190421,
       20190501, 20190511, 20190521, 20190601, 20190611, 20190

In [4]:
store = zarr.storage.LocalStore(zarr_path)
compressor = zarr.codecs.BloscCodec()

In [5]:
# Run ONLY when creating the zarr storage!!
# root = zarr.group(store=store, overwrite=True)
root = zarr.group(store=store)

In [6]:
# Run ONLY when creating the new dataset group!!
# dataset = "VPP"
dataset = "ST"
ds = root.require_group(dataset)

In [7]:
x_extent = np.arange(4200000, 4900000, 10)
y_extent = np.arange(2500000, 3000000, 10)


In [8]:
ds.create_array(
    name="time",
    shape=(len(dates_int),),
    dtype="int32",
    chunks=(len(dates_int),),
    dimension_names=["time"],
    attributes={
        "units": "YYYYMMDD",  
        "calendar": "noleap"
    },
    overwrite=True
)[:] = dates_int

x_array = ds.create_array(
    name="x",
    shape=x_extent.shape,
    dtype="int32",
    chunks=(len(x_extent),),
    dimension_names=["x"],
    overwrite=True
)
x_array[:] = x_extent

y_array = ds.create_array(
    name="y",
    shape=y_extent.shape,
    dtype="int32",
    chunks=(len(y_extent),),
    dimension_names=["y"],
    overwrite=True
)
y_array[:] = y_extent

print(len(dates_int))
print(x_extent.shape)

zarr.consolidate_metadata(store)

324
(70000,)




<Group file://../../eodc/products/eodc/clms_vegetation_total_productivity/CLMS.zarr>

In [8]:
productType = "PPI"

In [9]:
# test = rioxarray.open_rasterio(f"../../eodc/private/tempearth/CLMS/ST_{productType}/ST_20210311T000000_S2_E45N26-03035-010m_V105_{productType}.tif")

# print(test)

In [10]:
fill_value_PPI = -32768
scale_factor_PPI =  0.0001

attributes_PPI={
        "TIFFTAG_COPYRIGHT": "Copernicus service information 2023",
        "scale_factor": scale_factor_PPI,
        "_FillValue": fill_value_PPI,
        "TIIFTAG_SOFTWARE": "Timesat : TIMESAT4.1.8",
        "PhysRange": "0 to 3",
        "add_offset": 0.0,
        "long_name": "Plant Phenology Index obtained via timesat fitting",
    },


In [11]:
shape = (len(dates_int), len(y_extent), len(x_extent))
chunk_shape = (1, 10000, 10000)

fv =  globals()[f"fill_value_{productType}"]

if -32768 <= fv < 0:
    dtype = "int16"
elif 0 <= fv <= 65535:
    dtype = "uint16"

In [None]:
data_array = ds.create_array(
    name=productType,
    shape=shape,
    chunks=chunk_shape,
    dtype=dtype,
    fill_value = globals()[f"fill_value_{productType}"],
    compressor=compressor,
    dimension_names=["time", "y", "x"],
    attributes=globals()[f"attributes_{productType}"],
    # overwrite=True
)

  compressors = _parse_deprecated_compressor(


In [None]:
#current_start = 0 when we start in 2017
current_start = 72
current_end = current_start
path = f"../../eodc/private/tempearth/CLMS/ST_{productType}/*.tif"

for y in dates_int[72:91]:
    print(f"date = {y}")

    tif_files = sorted([
        f for f in glob.glob(path)
        if "100m" not in f and str(y) in os.path.basename(f)
    ])
        
    for filepath in tif_files:

        print(f"file: {filepath}")

        try:
            file = rioxarray.open_rasterio(filepath)
            tp = file.values.astype("float32")
        except Exception as e:
            print(f"Unexpected error with file: {filepath}")
            print(f"Error: {e}")
            continue


        # Extract both xmin and ymin
        match = re.search(r'E(\d+)N(\d+)', filepath)
        if match:
            xmin = int(match.group(1))*1000
            ymin = int(match.group(2))*1000
                
        else:
            print("No match found.")

        xmax = xmin + 1000
        ymax = ymin + 1000
        print(f"xmin: {xmin}, xmax: {xmax}, ymin: {ymin}, ymax: {ymax}")

        num_years = tp.shape[0]
        current_end = current_start + num_years
            
        data_array[current_start:current_end, ymin:ymax, xmin:xmax] = tp
        # print(data_array[current_start:current_end, ymin:ymin+3, xmin:xmin+3])

        # Free memory
        del tp
        gc.collect()
    current_start = current_end
    print(f"current start = {current_start}")

date = 20190101
file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E42N25-03035-010m_V101_PPI.tif
xmin: 42000, xmax: 43000, ymin: 25000, ymax: 26000
file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E42N26-03035-010m_V101_PPI.tif
xmin: 42000, xmax: 43000, ymin: 26000, ymax: 27000
file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E42N27-03035-010m_V101_PPI.tif
xmin: 42000, xmax: 43000, ymin: 27000, ymax: 28000
file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E42N28-03035-010m_V101_PPI.tif
Unexpected error with file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E42N28-03035-010m_V101_PPI.tif
Error: Read failed. See previous exception for details.
file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E43N25-03035-010m_V101_PPI.tif
xmin: 43000, xmax: 44000, ymin: 25000, ymax: 26000
file: ../../eodc/private/tempearth/CLMS/ST_PPI/ST_20190101T000000_S2_E43N26-03035-010m_V101_PPI.tif
xm