In [31]:
import os
import shutil
os.environ['USE_PYGEOS'] = '0'
os.environ['PROJ_LIB'] = '/opt/conda/share/proj/'
os.environ['NUMEXPR_MAX_THREADS'] = '96'
os.environ['NUMEXPR_NUM_THREADS'] = '96'
#os.environ['OMP_THREAD_LIMIT'] = '96'
#os.environ['OMP_NUM_THREADS'] = '96'

from datetime import datetime
from osgeo import gdal, gdal_array
from pathlib import Path
from typing import Callable, Iterator, List,        Union
import bottleneck as bn
import geopandas as gpd
import numpy as np
import pandas as pd
import skmap_bindings
import tempfile
import time
import sys



gdal_opts = {
 #'GDAL_HTTP_MULTIRANGE': 'SINGLE_GET',
 #'GDAL_HTTP_MERGE_CONSECUTIVE_RANGES': 'NO',
 'GDAL_HTTP_VERSION': '1.0',
 #'GDAL_DISABLE_READDIR_ON_OPEN': 'EMPTY_DIR',
 #'VSI_CACHE': 'FALSE',
 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
 #'GDAL_HTTP_CONNECTTIMEOUT': '320',
 #'CPL_VSIL_CURL_USE_HEAD': 'NO',
 #'GDAL_HTTP_TIMEOUT': '320',
 #'CPL_CURL_GZIP': 'NO'
}

co = ['TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE', 'BLOCKXSIZE=1024', 'BLOCKYSIZE=1024']

executor = None

def ttprint(*args, **kwargs):
    from datetime import datetime
    import sys

    print(f'[{datetime.now():%H:%M:%S}] ', end='')
    print(*args, **kwargs, flush=True)

def make_tempdir(basedir='skmap', make_subdir = True):
    tempdir = Path(TMP_DIR).joinpath(basedir)
    if make_subdir: 
        name = Path(tempfile.NamedTemporaryFile().name).name
        tempdir = tempdir.joinpath(name)
    tempdir.mkdir(parents=True, exist_ok=True)
    return tempdir

def make_tempfile(basedir='skmap', prefix='', suffix='', make_subdir = False):
    tempdir = make_tempdir(basedir, make_subdir=make_subdir)
    return tempdir.joinpath(
        Path(tempfile.NamedTemporaryFile(prefix=prefix, suffix=suffix).name).name
    )
                


In [43]:
import numpy as np
start_tile=1
end_tile=2

TMP_DIR = tempfile.gettempdir()

tiles_fn = '/mnt/slurm/jobs/wri_pasture_class/gpw_tiles.gpkg'

years = range(1997,2023)
x_size, y_size = (4004, 4004)
x_off, y_off = (0, 0)
n_threads = 96
no_data = 255.
bands_list = [1,]
mask_file_prefix = '/seasconv/clear_sky_mask.SeasConv.ard2_m_30m_s_'
band_file_prefix = '/seasconv/green_glad.SeasConv.ard2_m_30m_s_'

file_ending = '_go_epsg.4326_v20230908.tif'
gaia_prefix = '/vsicurl/http://192.168.49.'

tiles_csv = gpd.read_file(tiles_fn)
tiles = tiles_csv['TILE'][start_tile:end_tile].to_list()
ttprint(f"Processing {len(tiles)} tiles")


ttprint("Reading tiling system")

month_start = ['0101'
           ,'0301'
           ,'0501'
           ,'0701'
           ,'0901'
           ,'1101']
month_end = ['0228'
           ,'0430'
           ,'0630'
           ,'0831'
           ,'1031'
           ,'1231']

for tile in tiles:
        mask_files = []
        band_files = []
        for year in years:
                for bimonth in range(6):
                        mask_files.append(f'{gaia_prefix}{30+((bimonth+6*year)%13)}:8333/prod-landsat-ard2/{tile}{mask_file_prefix}{year}{month_start[bimonth]}_{year}{month_end[bimonth]}{file_ending}')
                        # band_files.append(f'{gaia_prefix}{30+((bimonth+6*year)%13)}:8333/prod-landsat-ard2/{tile}{band_file_prefix}{year}{month_start[bimonth]}_{year}{month_end[bimonth]}{file_ending}')
        
        n_files = len(mask_files)
        mask_idx = range(n_files)
        # band_idx = range(n_files)
        mask_data = np.empty((n_files, x_size*y_size), dtype=np.float32)
        # band_data = np.empty((n_files, x_size*y_size), dtype=np.float32)
        
        start = time.time()
        skmap_bindings.readData(mask_data, n_threads, mask_files, mask_idx, x_off, y_off, x_size, y_size, bands_list, gdal_opts, no_data, no_data)
        ttprint(f"Tile {tile} - Reading mask: {(time.time() - start):.2f} segs")
        
        # start = time.time()
        # skmap_bindings.readData(band_data, n_threads, band_files, band_idx, x_off, y_off, x_size, y_size, bands_list, gdal_opts, no_data, np.nan)
        # ttprint(f"Tile {tile} - Reading band: {(time.time() - start):.2f} segs")
        
        start = time.time()
        
        # Convert mask_data to integers
        mask_data_int = mask_data.astype(int)

        # Compute frequencies
        frequencies = np.bincount(mask_data_int.flatten(), minlength=256)
        ttprint(f"Tile {tile} - Computing frequencies: {(time.time() - start):.2f} segs")
        
        # Print frequencies
        for i, freq in enumerate(frequencies):
                print(f"Value {i}: Frequency {freq}")
        

[14:15:03] Processing 1 tiles
[14:15:03] Reading tiling system
[14:15:36] Tile 124W_74N - Reading mask: 33.16 segs


: 

: 

: 