In [1]:
import sys
sys.path.append('..')

In [2]:
import chcfetch.chcfetch

In [4]:
available_years = chcfetch.chcfetch.query_list_of_available_years(
    product = chcfetch.chcfetch.Products.CHIRPSv3.RNL,
)

In [None]:
len(available_years)

In [None]:
paths_df = chcfetch.chcfetch.query_chirps_v2_global_daily(
    product = chcfetch.chcfetch.Products.CHIRPSv3.RNL,
    years = available_years,
    njobs = 16,
    path_ends_with_list = ['.tif'],
)

paths_df.head()

In [None]:
downloads_df = chcfetch.chcfetch.download_files_from_paths_df(
    paths_df = paths_df,
    download_folderpath = '/gpfs/data1/cmongp2/sasirajann/nh_crop_calendar/crop_calendar/data/CHIRPS-v3.0',
    njobs = 16,
)

downloads_df.head()

In [37]:
downloads_df.to_csv('/gpfs/data1/cmongp2/sasirajann/nh_crop_calendar/crop_calendar/data/CHIRPS-v3.0/catalog.csv', index=False)

In [8]:
import geopandas as gpd

import rsutils.modify_images

In [9]:
region_gdf = gpd.read_file('/gpfs/data1/cmongp1/sasirajann/togo/shapefiles/Shapefiles/tgo_admbnda_adm0_inseed_itos_20210107.shp')

In [None]:
downloads_df['date']

In [None]:
data_meta_list = \
rsutils.modify_images.load_images(
    src_filepaths = downloads_df['local_filepath'],
    shapes_gdf = region_gdf,
    njobs = 120,
)

In [12]:
if not all(downloads_df['date'].index == downloads_df['date'].sort_values().index):
    raise ValueError('dates are not in order')

In [13]:
dates = downloads_df['date'].to_numpy()

In [None]:
set(dates[1:] - dates[:-1])

In [None]:
{data_meta[0].shape for data_meta in data_meta_list}

In [16]:
import xarray as xr
import numpy as np

In [None]:
chirps_v3_togo_da = xr.DataArray(
    np.concatenate([data_meta[0] for data_meta in data_meta_list]),
    coords = {
        'valid_time': downloads_df['date'].to_numpy(),
    },
    dims = ('valid_time', 'x', 'y'),
)

In [None]:
chirps_v3_togo_da.valid_time[0], chirps_v3_togo_da.valid_time[-1]

In [None]:
chirps_v3_togo_da

In [20]:
chirps_v3_togo_da = chirps_v3_togo_da.assign_coords(year=chirps_v3_togo_da.valid_time.dt.year)

In [None]:
chirps_v3_togo_da

In [22]:
cumulative = chirps_v3_togo_da.groupby("year").cumsum(dim="valid_time")

In [23]:
cumulative_doy_mean = cumulative.groupby('valid_time.dayofyear').mean('valid_time').sel(dayofyear=slice(1, 365))

In [None]:
cumulative_doy_mean.max()

In [None]:
cumulative_doy_mean.sel(x=50, y=20).plot()

In [26]:
window = 10
rolling = chirps_v3_togo_da.rolling(valid_time=window, min_periods=window).sum().sel(valid_time=slice(None, "2025-01-01"))
rolling_doy_mean = rolling.groupby('valid_time.dayofyear').mean('valid_time').sel(dayofyear=slice(1, 365))

In [None]:
rolling_doy_mean.sel(x=50, y=20).plot()

In [28]:
import sklearn.cluster
import rsutils.utils
import pandas as pd
import os

In [29]:
def relabel_clusters_by_count(cluster_ids:np.ndarray):
    _ids, _counts = np.unique(cluster_ids, return_counts=True)
    cluster_count_df = pd.DataFrame(data={
        'cluster_id': _ids,
        'count': _counts
    })
    cluster_count_df = cluster_count_df.sort_values(by='count', ascending=False)
    cluster_count_df['new_cluster_id'] = range(_ids.shape[0])
    new_cluster_id_map = dict(zip(
        cluster_count_df['cluster_id'],
        cluster_count_df['new_cluster_id'],
    ))
    new_cluster_ids = np.zeros(shape=cluster_ids.shape)
    for old_id, new_id in new_cluster_id_map.items():
        new_cluster_ids[cluster_ids == old_id] = new_id
    return new_cluster_ids.astype(int)

In [30]:
export_folderpath = '/gpfs/data1/cmongp2/sasirajann/nh_crop_calendar/crop_calendar/data/togo'

In [None]:
# filename = 'cumulative_doy_mean'
# band_name = 'cumulative rainfall DOY mean'
# data = cumulative_doy_mean

filename = f'rolling{window}_doy_mean'
band_name = f'rolling sum (window={window}) rainfall DOY mean'
data = rolling_doy_mean

n_ts, height, width = data.values.shape

data_2d = data.values.reshape(n_ts, height*width).swapaxes(0, 1)
data_2d.shape

y_max = np.ceil(data.max().data)

n_clusters = 3
nrows, ncols = 2, 2

cluster_ids = sklearn.cluster.MiniBatchKMeans(
    n_clusters = n_clusters,
    random_state = 42,
).fit(data_2d).labels_

cluster_ids = relabel_clusters_by_count(cluster_ids=cluster_ids)

rsutils.utils.plot_clustered_lineplots(
    crop_name = '',
    band_name = band_name,
    timeseries = data_2d,
    cluster_ids = cluster_ids,
    y_min = -1,
    y_max = y_max,
    nrows = nrows,
    ncols = ncols,
    x = range(1, n_ts + 1),
    x_label = 'DOY',
    save_filepath = os.path.join(export_folderpath, f'{filename}.png'),
    alpha = 0.02,
    aspect_ratio = 1.5,
)

In [None]:
1

In [35]:
import rasterio

In [36]:
meta = data_meta_list[0][1]

meta.update({
    'dtype': 'uint8',
    'nodata': 0,
})

with rasterio.open(os.path.join(export_folderpath, f'{filename}.tif'), 'w', **meta) as dst:
    dst.write(cluster_ids.reshape(height, width).astype(np.uint8), 1)