In [None]:
import os
import shutil
os.environ['OMPI_MCA_rmaps_base_oversubscribe'] = '1'
os.environ['USE_PYGEOS'] = '0'
os.environ['PROJ_LIB'] = '/opt/conda/share/proj/'
os.environ['NUMEXPR_MAX_THREADS'] = '48'
os.environ['NUMEXPR_NUM_THREADS'] = '48'
os.environ['OMP_THREAD_LIMIT'] = '48'
os.environ["OMP_NUM_THREADS"] = "48"
os.environ["OPENBLAS_NUM_THREADS"] = "48" # export OPENBLAS_NUM_THREADS=4 
os.environ["MKL_NUM_THREADS"] = "48" # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "48" # export VECLIB_MAXIMUM_THREADS=4
import gc
from datetime import datetime
from osgeo import gdal, gdal_array
from pathlib import Path
from typing import Callable, Iterator, List,        Union
import bottleneck as bn
import geopandas as gpd
import numpy as np
import pandas as pd
import skmap_bindings
import tempfile
import time
import sys
import random
import csv
from scipy.signal import savgol_coeffs
import numpy as np
from skmap.io import process
import matplotlib.pyplot as plt
from mpi4py import MPI

gdal_opts = {
 'GDAL_HTTP_VERSION': '1.0',
 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
}

co = ['TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE', 'BLOCKXSIZE=1024', 'BLOCKYSIZE=1024']

executor = None

def ttprint(*args, **kwargs):
    from datetime import datetime
    import sys

    print(f'[{datetime.now():%H:%M:%S}] ', end='')
    print(*args, **kwargs, flush=True)

def make_tempdir(basedir='skmap', make_subdir = True):
    tempdir = Path(TMP_DIR).joinpath(basedir)
    if make_subdir: 
        name = Path(tempfile.NamedTemporaryFile().name).name
        tempdir = tempdir.joinpath(name)
    tempdir.mkdir(parents=True, exist_ok=True)
    return tempdir

def make_tempfile(basedir='skmap', prefix='', suffix='', make_subdir = False):
    tempdir = make_tempdir(basedir, make_subdir=make_subdir)
    return tempdir.joinpath(
        Path(tempfile.NamedTemporaryFile(prefix=prefix, suffix=suffix).name).name
    )

years = range(2001,2010)
x_size, y_size = (4004, 4004)
n_pix = x_size * y_size
x_off, y_off = (0,0)
n_threads = 48
no_data = 255
bands_list = [1,]
file_ending = '_go_epsg.4326_v20230908.tif'
n_imag_per_year = 6
out_index_offset = 0
n_years = len(years)
n_s = n_years*n_imag_per_year
m_end = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

# tiles = ['009E_04N', '009E_51N', '013E_61N', '050W_07S', '085W_52N', '091W_37N', '115E_03S', '127E_42N']
tile ='085W_52N'
    
ttprint(f"Processing tile {tile}")
modis_mosaics = []
modis_files = []
tile_files = []
for year in years:
    for m in range(12):
        # modis_files.append(f'data/{tile}/modis/modis_ndvi_{year}{str(m+1).zfill(2)}01.tif')
        modis_files.append(f'data/{tile}/modis/modis_ndvi_20000701.tif')
        # modis_mosaics.append(f'/vsicurl/http://192.168.1.{random.randint(30,43)}:8333/global/veg/veg_ndvi_mod13q1.v061_p50_250m_s0..0cm_{year}.{str(m+1).zfill(2)}.01..{year}.{str(m+1).zfill(2)}.{m_end[m]}_v1.tif')
        modis_mosaics.append(f'../../Desktop/veg_ndvi_mod13q1.v061_p50_250m_s0..0cm_2000.07.01..2000.07.31_v1.tif')
        tile_files.append(f"data/{tile}/landsat_agg/blue_glad.ard2_m_30m_s_{year}0101_{year}0228_go_epsg.4326_v20230908.tif")

modis_month_data = np.empty((n_s*2,n_pix), dtype=np.float32)
warp_data = np.empty((n_s*2,4004*4004), dtype=np.float32)

start = time.time()
skmap_bindings.readData(modis_month_data, n_threads, modis_files, range(len(modis_files)), x_off, y_off, x_size, y_size, bands_list, gdal_opts, -32768, np.nan)
ttprint(f"Tile {tile} - Reading data: {(time.time() - start):.2f} segs")

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
if size != 1:
    print("This example requires exactly one parent process.")
    comm.Abort(1)
n_proc = len(modis_mosaics)

start = time.time()
intercomm = MPI.COMM_SELF.Spawn(
    sys.executable, args=['child.py', ",".join(tile_files), ",".join(modis_mosaics), str(n_threads), str(n_pix)], maxprocs=n_proc)
print(f"Time spawn: {(time.time() - start):.2f} segs")


# start = time.time()
# # Gather results from child processes
# data = np.empty((n_proc, n_pix), dtype=np.float32)
# time_tmp = 0
# for i in range(n_proc):
#     received_array = np.empty(n_pix, dtype=np.float32)
#     intercomm.Recv(received_array, source=MPI.ANY_SOURCE, tag=i)
#     start_tmp = time.time()
#     data[i, :] = received_array
#     time_tmp += time.time() - start_tmp
# print(f"Time copy Python: {time_tmp:.2f} segs")
# print(f"Time gathering: {(time.time() - start):.2f} segs")


start = time.time()
time_tmp = 0
# Gather results from child processes
data = np.empty((n_proc, n_pix), dtype=np.float32)
for i in range(n_proc):
    received_array = np.empty(n_pix, dtype=np.float32)
    intercomm.Recv(received_array, source=MPI.ANY_SOURCE, tag=i)
    start_tmp = time.time()
    skmap_bindings.copyVecInMatrixRow(data, n_threads, received_array, i)
    time_tmp += time.time() - start_tmp
print(f"Time copy C++: {time_tmp:.2f} segs")
print(f"Time gathering: {(time.time() - start):.2f} segs")

intercomm.Disconnect()
MPI.Finalize()

In [None]:
start = time.time()
skmap_bindings.warpTile(warp_data, n_threads, gdal_opts, tile_files[0], modis_mosaics[0])
ttprint(f"Tile {tile} - Warping data: {(time.time() - start):.2f} segs")

test_data = np.round(data)[0]
ref_data = modis_month_data[0]
diff_data = test_data - ref_data

print(f"Maximum pixel difference {np.max(np.abs(diff_data))}")
print(f"Relative norm error {np.linalg.norm(diff_data)/np.linalg.norm(ref_data)*100} %")
print(f"Different pixels {np.count_nonzero(diff_data)/n_pix * 100} %")

In [None]:
plt.imshow(ref_data.reshape((4004, 4004)))
plt.title("Bash")
plt.show()
plt.imshow(test_data.reshape((4004, 4004)))
plt.title("C++")
plt.show()
plt.imshow(diff_data.reshape((4004, 4004)))
plt.colorbar()
plt.title("Difference")
plt.show()