In [1]:
import numpy as np
import pandas as pd
import sys
from datetime import datetime
import skmap_bindings
from pathlib import Path
import time
import os
os.environ['PROJ_LIB'] = '/opt/conda/share/proj/'

gdal_opts = {
 'GDAL_HTTP_VERSION': '1.0',
 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
}
n_threads = 96
percentiles = [25., 50., 75.]
bands_list = [1,]
x_off = 0
y_off = 0
x_size, y_size = (120960, 47040)
nodata_val = -1
compression_command = f"gdal_translate -a_nodata {nodata_val} -co COMPRESS=deflate -co TILED=TRUE -co BLOCKXSIZE=1024 -co BLOCKYSIZE=1024 -co NUM_THREADS=8"
tmp_write_dir = '../data/tmp'

In [2]:
df = pd.read_csv("/mnt/nibble/VITO_GDP/new_list.in", header=None)
df = df.rename(columns={0: 'file'})
df['date'] = df['file'].apply(lambda f: f.split('_')[3][0:8])
# df['file'] = df['file'].apply(lambda f: f"http://192.168.49.30:8333/tmp-global-geomorpho/GDP/{f}" )
df['file'] = df['file'].apply(lambda f: f"../data/{f}" )
df = df.sort_values('date')
df['dt1'] = pd.to_datetime(df['date'], format='%Y%m%d')
# df['dt2'] = np.roll(df['dt1'],-1)
# df['dt2'].iloc[df.shape[0]-1] = pd.to_datetime('20231231', format='%Y%m%d')
# df['file'] = df['file'].apply(lambda x: x.replace('192.168.49.30', f'192.168.49.{30+random.randint(0,13)}'))
df = df.set_index(['dt1'])

In [None]:
bimonth_start = ('01-01', '03-01', '05-01', '07-01', '09-01', '11-01')
bimonth_end = ('02-28', '04-30', '06-30', '08-31', '10-31', '12-31')
years = range(2014, 2024)
for year in years:
    for bimonth in range(0,6):        
        print(f"Processing {year} from {bimonth_start[bimonth]} to {bimonth_end[bimonth]}", flush=True)
        if (year%4 == 0) & (bimonth == 0):
            files_list = df.loc[f'{year}-{bimonth_start[bimonth]}':f'{year}-02-29']['file'].tolist()
        else:
            files_list = df.loc[f'{year}-{bimonth_start[bimonth]}':f'{year}-{bimonth_end[bimonth]}']['file'].tolist()
        start = time.time()
        n_rasters = len(files_list)
        file_order = np.arange(n_rasters)
        shape = (n_rasters, x_size * y_size)
        array = np.empty(shape, dtype=np.float32)
        skmap_bindings.readData(array, n_threads, files_list, file_order, x_off, y_off, x_size, y_size, bands_list, gdal_opts)
        print(f"Read files in {time.time() - start:.2f} s", flush=True)
        
        start = time.time()        
        array_t = np.empty((shape[1], shape[0]), dtype=np.float32)
        array_perc_t = np.empty((shape[1], len(percentiles)), dtype=np.float32)
        skmap_bindings.transposeArray(array, n_threads, array_t)        
        skmap_bindings.computePercentiles(array_t, n_threads, array_perc_t, percentiles)
        print(f"Computed percentiles in {time.time() - start:.2f} s", flush=True)
        
        start = time.time()        
        array_perc = np.empty((len(percentiles), shape[1]), dtype=np.float32)
        skmap_bindings.transposeArray(array_perc_t, n_threads, array_perc)
        start_date = f'{year}-{bimonth_start[bimonth]}'.replace('-', '')
        end_date = f'{year}-{bimonth_end[bimonth]}'.replace('-', '')
        base_raster = files_list[0]
        out_files = [f'c.gls.GDMP300.agg.m.bimonthly_p{p:.0f}_30m_s_{start_date}_{end_date}_global_epsg.4326_v20240307' for p in percentiles]
        write_idx = range(0, len(out_files))
        skmap_bindings.writeInt16Data(array_perc, n_threads, gdal_opts, base_raster, tmp_write_dir, out_files, write_idx,
            x_off, y_off, x_size, y_size, nodata_val, compression_command)
        print(f"Saved files in {time.time() - start:.2f} s", flush=True)
        

Processing 2014 from 01-01 to 02-28
Read files in 31.64 s
Computed percentiles in 41.26 s
Saved files in 148.47 s
Processing 2014 from 03-01 to 04-30
Read files in 29.77 s
Computed percentiles in 38.74 s
Saved files in 144.39 s
Processing 2014 from 05-01 to 06-30
Read files in 32.48 s
Computed percentiles in 43.32 s
Saved files in 150.91 s
Processing 2014 from 07-01 to 08-31
Read files in 33.45 s
Computed percentiles in 38.85 s
Saved files in 152.09 s
Processing 2014 from 09-01 to 10-31
Read files in 34.62 s
Computed percentiles in 43.44 s
Saved files in 150.13 s
Processing 2014 from 11-01 to 12-31
Read files in 29.20 s
Computed percentiles in 38.80 s
Saved files in 139.47 s
Processing 2015 from 01-01 to 02-28
Read files in 30.73 s
Computed percentiles in 38.47 s
Saved files in 144.24 s
Processing 2015 from 03-01 to 04-30
Read files in 31.62 s
Computed percentiles in 38.74 s
Saved files in 145.63 s
Processing 2015 from 05-01 to 06-30
Read files in 34.36 s
Computed percentiles in 41.51 