In [1]:
import os
import shutil
os.environ['USE_PYGEOS'] = '0'
os.environ['PROJ_LIB'] = '/opt/conda/share/proj/'
os.environ['NUMEXPR_MAX_THREADS'] = '48'
os.environ['NUMEXPR_NUM_THREADS'] = '48'
os.environ['OMP_THREAD_LIMIT'] = '48'
os.environ["OMP_NUM_THREADS"] = "48"
os.environ["OPENBLAS_NUM_THREADS"] = "48" # export OPENBLAS_NUM_THREADS=4 
os.environ["MKL_NUM_THREADS"] = "48" # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "48" # export VECLIB_MAXIMUM_THREADS=4
import gc
from datetime import datetime
from osgeo import gdal, gdal_array
from pathlib import Path
from typing import Callable, Iterator, List,        Union
import bottleneck as bn
import geopandas as gpd
import numpy as np
import pandas as pd
import skmap_bindings
import tempfile
import time
import sys
import csv
from scipy.signal import savgol_coeffs
import numpy as np
from skmap.io import process
import matplotlib.pyplot as plt
import multiprocessing

gdal_opts = {
 'GDAL_HTTP_VERSION': '1.0',
 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
}

co = ['TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE', 'BLOCKXSIZE=1024', 'BLOCKYSIZE=1024']

executor = None

def ttprint(*args, **kwargs):
    from datetime import datetime
    import sys

    print(f'[{datetime.now():%H:%M:%S}] ', end='')
    print(*args, **kwargs, flush=True)

def make_tempdir(basedir='skmap', make_subdir = True):
    tempdir = Path(TMP_DIR).joinpath(basedir)
    if make_subdir: 
        name = Path(tempfile.NamedTemporaryFile().name).name
        tempdir = tempdir.joinpath(name)
    tempdir.mkdir(parents=True, exist_ok=True)
    return tempdir

def make_tempfile(basedir='skmap', prefix='', suffix='', make_subdir = False):
    tempdir = make_tempdir(basedir, make_subdir=make_subdir)
    return tempdir.joinpath(
        Path(tempfile.NamedTemporaryFile(prefix=prefix, suffix=suffix).name).name
    )

x_size, y_size = (4800, 4800)
x_off, y_off = (0,0)
n_threads = multiprocessing.cpu_count()
no_data, delc, mod_decl, stress, stable, incr = (0, 1, 2, 3, 4, 5)
bands_list = [1,]

data_dir = 'data_mirova'
years = range(2000,2012)
n_years = len(years)
assert years[0] >= 2000, "Data not available before 2000, the target year should be after 2015"
# @TODO check also that the max year is lower then the currnt one

In [None]:
start = time.time()
# Store file paths in a dictionary
files_by_year = {year: [] for year in years}
for filename in os.listdir(data_dir):
    if filename.startswith('MOD13Q1.A') and filename.endswith('_ndvi.tif'):
        year = int(filename[9:13])
        if year in years:
            file_path = os.path.join(data_dir, filename)
            files_by_year[year].append(file_path)

# Check that all files are there and compute the NDVI integrals
NDVI_integrals = np.empty((n_years, x_size * y_size), dtype=np.float32)
for i, year in enumerate(years):
    n_year_files = len(files_by_year[year])
    if year == 2000:
        assert n_year_files == 20, f"Error in the data download, not enough files for the year {year}"
    else:
        assert n_year_files == 23, f"Error in the data download, not enough files for the year {year}"
    NDVI_tmp = np.empty((n_year_files, x_size * y_size), dtype=np.float32)
    NDVI_tmp_t = np.empty(NDVI_tmp.shape[::-1], dtype=np.float32)
    skmap_bindings.readData(NDVI_tmp, n_threads, files_by_year[year], range(n_year_files), x_off, y_off, x_size, y_size, bands_list, gdal_opts, -3000, np.nan)
    skmap_bindings.transposeArray(NDVI_tmp, n_threads, NDVI_tmp_t)
    skmap_bindings.nanMean(NDVI_tmp_t, n_threads, NDVI_integrals[i])
ttprint(f"Reading data and computing NDVI integrals: {(time.time() - start):.2f} sec")



In [5]:
start = time.time()
NDVI_integrals_t = np.empty((x_size * y_size, n_years), dtype=np.float32)
skmap_bindings.transposeArray(NDVI_integrals, n_threads, NDVI_integrals_t)
time_samples = np.arange(n_years).astype(np.float32)
slope = np.empty((x_size * y_size,), dtype=np.float32)
intercept = np.empty((x_size * y_size,), dtype=np.float32)
skmap_bindings.linearRegression(NDVI_integrals_t, n_threads, time_samples, intercept, slope)
ttprint(f"Computing C++ linear regression: {(time.time() - start):.2f} sec")   

start = time.time()
p_values = np.empty((x_size * y_size,), dtype=np.float32)
skmap_bindings.computeMannKendallPValues(NDVI_integrals_t, n_threads, p_values)
ttprint(f"Computing p-values: {(time.time() - start):.2f} sec")   


[10:49:21] Computing C++ linear regression: 0.29 sec
[10:49:22] Computing p-values: 1.47 sec


In [None]:
plt.imshow(np.reshape(p_values,(x_size, y_size)))
plt.colorbar()
plt.show()
plt.imshow(np.reshape(NDVI_integrals[0],(x_size, y_size)))
plt.colorbar()
plt.show()
plt.imshow(np.reshape(slope,(x_size, y_size)))
plt.colorbar()
plt.show()

In [None]:
# ttprint(f"Processing file")

# start = time.time()
# out_data = NDVI_tmp.copy()
# # Some random example to get compilant with the required format
# out_data[NDVI_tmp>8000] = incr
# out_data[out_data>6000] = stable
# out_data[out_data>4000] = stress
# out_data[out_data>2000] = mod_decl
# out_data[NDVI_tmp<=2000] = delc
# out_data[NDVI_tmp==-3000] = no_data
# ttprint(f"Processing data: {(time.time() - start):.2f} segs")

# out_raseter = [f'test_Mirova_prod_subind_{i}' for i in range(len(out_data))]
# base_raseter = [files_by_year[years[0]][0] for i in range(len(out_data))]

# start = time.time()
# skmap_bindings.writeByteData(out_data, n_threads, gdal_opts, base_raseter, f'/home/dconsoli/Desktop', out_raseter, range(len(out_raseter)),
#     x_off, y_off, x_size, y_size, 0)
# ttprint(f"Saving data: {(time.time() - start):.2f} segs")
