In [37]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
import pandas as pd
import rasterio as rio
from glob import glob
import numpy as np

targets = ["ndti", "ndwi", "ndci", "doc"]
dates = pd.date_range("2019-01-01", "2023-12-31").astype(str).to_list()
categories = {}
stats = {}

# calculate category thresholds
for i, target in enumerate(targets):
    means = []
    mins = []
    maxs = []

    for date in sorted(dates):
        p = glob(f"data/{target}_masked_{date}.tif")

        assert not len(p) > 1, f"More than one file found for {target} and {date}"

        if p:
            p = p[0]
            with rio.open(p) as src:
                arr = src.read(1)

            means.append(arr.mean())
            mins.append(arr.min())
            maxs.append(arr.max())
    means = np.array(means)
    stats[target] = {
        "mean": means.mean(),
        "std": means.std(),
        "min": means.min(),
        "max": means.max(),
        "means": means,
    }

    categories.update( {target: [
        stats[target]["min"],
        stats[target]["mean"] + 1.00 * stats[target]["std"],
        stats[target]["mean"] + 1.96 * stats[target]["std"],
        stats[target]["mean"] + 2.50 * stats[target]["std"],
        stats[target]["max"],
        ]})
    
categories

{'ndti': [-0.02790498223192541,
  -0.016661699973141333,
  -0.011586964235846795,
  -0.008732425383618616,
  -0.015575143095393804],
 'ndwi': [-0.011335533813099,
  0.013559669264651734,
  0.023558918543832148,
  0.02918349626337113,
  0.015192809193684244],
 'ndci': [0.008703268672375534,
  0.010975902793140343,
  0.0119403413064873,
  0.012482837970244964,
  0.011513991096771554],
 'doc': [2.792285665067632,
  4.028893156651285,
  4.6024729422898645,
  4.925111571711566,
  4.200584734477447]}

In [39]:
# relate categories thersholds with analytics labels
analytics = {}
for target in targets:
    target_stats = stats[target]
    analytics[target] = target_stats

analytics

{'ndti': {'mean': -0.021947883032823145,
  'std': 0.005286183059681811,
  'min': -0.02790498223192541,
  'max': -0.015575143095393804,
  'means': array([-0.0263741 , -0.01793731, -0.02790498, -0.01557514])},
 'ndwi': {'mean': 0.003143784598838805,
  'std': 0.01041588466581293,
  'min': -0.011335533813099,
  'max': 0.015192809193684244,
  'means': array([ 0.01519281, -0.00181277,  0.01053063, -0.01133553])},
 'ndci': {'mean': 0.009971279341737261,
  'std': 0.0010046234514030808,
  'min': 0.008703268672375534,
  'max': 0.011513991096771554,
  'means': array([0.00990986, 0.01151399, 0.00870327, 0.00975799])},
 'doc': {'mean': 3.431414213277764,
  'std': 0.5974789433735207,
  'min': 2.792285665067632,
  'max': 4.200584734477447,
  'means': array([2.90848477, 3.82430168, 2.79228567, 4.20058473])}}

In [40]:
# open an image and save it as nd.array:
p = glob("data/ndti_masked_2019-06-02.tif")[0]
with rio.open(p) as src:
    clip = src.read(1)
clip

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [41]:
"Return stats per category."
categories = categories.copy()
stats = {}
stats["mean"] = np.nanmean(clip) if clip.size else None

names = ["N%d" % i for i in range(len(categories) - 1)]

clip_copy = clip.copy()

for i in range(1, len(categories)):
    mask: np.ndarray = (clip_copy >= categories['ndti'][i - 1]) & (
        clip_copy <= categories['ndti'][i]
    )

    stats[names[i - 1]] = {"pixels": np.nansum(mask)}

    "Modify float raster to ordinal."
    clip[mask] = i

stats

{'mean': -0.026374097739417544,
 'N0': {'pixels': 270},
 'N1': {'pixels': 97},
 'N2': {'pixels': 48}}

In [42]:
mask

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [43]:
import os

# Define the output directory
output_dir = "data"
os.makedirs(output_dir, exist_ok=True)

# Iterate over the target layers
for target in targets:
    for date in sorted(dates):
        # Load the raster
        raster_path = f"data/{target}_masked_{date}.tif"
        if not os.path.exists(raster_path):
            continue
        with rio.open(raster_path) as src:
            arr = src.read(1)