# Setup the production pipeline

In [3]:
import rasterio
from osgeo import gdal, gdal_array

import numpy as np
import multiprocessing
import ctypes
from multiprocessing import RawArray
import time
import concurrent
from concurrent.futures.process import ProcessPoolExecutor
from concurrent.futures.thread import ThreadPoolExecutor
from multiprocessing import shared_memory
import math
import tempfile
from pathlib import Path
import SharedArray as sa

import multiprocessing as mp
TMP_DIR = tempfile.gettempdir()

from skmap.mapper import LandMapper
from skmap.misc import find_files
from skmap.misc import vrt_warp, ttprint
from skmap.mapper import LandMapper
from skmap.misc import find_files
import geopandas as gpd
import numexpr as ne
import numpy as np
import traceback
import os

from pathlib import Path
import time
from typing import Callable, Iterator, List,    Union
from osgeo import gdal, gdal_array
import numpy as np
import SharedArray as sa
gdal_opts = {
 #'GDAL_HTTP_MULTIRANGE': 'SINGLE_GET',
 #'GDAL_HTTP_MERGE_CONSECUTIVE_RANGES': 'NO',
 'GDAL_HTTP_VERSION': '1.0',
 #'GDAL_DISABLE_READDIR_ON_OPEN': 'EMPTY_DIR',
 #'VSI_CACHE': 'FALSE',
 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
 #'GDAL_HTTP_CONNECTTIMEOUT': '320',
 #'CPL_VSIL_CURL_USE_HEAD': 'NO',
 #'GDAL_HTTP_TIMEOUT': '320',
 #'CPL_CURL_GZIP': 'NO'
}

In [4]:
def _model_input(tile, start_year = 2000, end_year = 2022, bands = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'thermal'], base_url='http://192.168.1.30:8333'):
    prediction_layers = []
    
    for year in range(start_year, end_year + 1):
        for band in bands:
            prediction_layers += [
                f'{base_url}/prod-landsat-ard2/{tile}/seasconv/{band}_glad.SeasConv.ard2_m_30m_s_' + '{year}0101_{year}0228_go_epsg.4326_v20230908.tif',
                f'{base_url}/prod-landsat-ard2/{tile}/seasconv/{band}_glad.SeasConv.ard2_m_30m_s_' + '{year}0301_{year}0430_go_epsg.4326_v20230908.tif',
                f'{base_url}/prod-landsat-ard2/{tile}/seasconv/{band}_glad.SeasConv.ard2_m_30m_s_' + '{year}0501_{year}0630_go_epsg.4326_v20230908.tif',
                f'{base_url}/prod-landsat-ard2/{tile}/seasconv/{band}_glad.SeasConv.ard2_m_30m_s_' + '{year}0701_{year}0831_go_epsg.4326_v20230908.tif',
                f'{base_url}/prod-landsat-ard2/{tile}/seasconv/{band}_glad.SeasConv.ard2_m_30m_s_' + '{year}0901_{year}1031_go_epsg.4326_v20230908.tif',
                f'{base_url}/prod-landsat-ard2/{tile}/seasconv/{band}_glad.SeasConv.ard2_m_30m_s_' + '{year}1101_{year}1231_go_epsg.4326_v20230908.tif'
            ]
    
    raster_files = []
    dict_layers_newnames = {}
    for l in prediction_layers:
    
        key = Path(l).stem.replace('{year}', '')
        value = Path(l).stem.replace('{year}', str(year))
        dict_layers_newnames[key] = value
        raster_files.append('/vsicurl/' + l.replace('{year}', str(year)))
    
    hosts = [ f'192.168.49.{i}:8333' for i in range(30,43) ]
    raster_files = [str(r).replace("192.168.49.30", f"192.168.49.{30 + int.from_bytes(Path(r).stem.encode(), 'little') % len(hosts)}") for r in raster_files]
    
    return raster_files, dict_layers_newnames

In [5]:
def make_tempdir(basedir='skmap', make_subdir = True):
    tempdir = Path(TMP_DIR).joinpath(basedir)
    if make_subdir: 
        name = Path(tempfile.NamedTemporaryFile().name).name
        tempdir = tempdir.joinpath(name)
    tempdir.mkdir(parents=True, exist_ok=True)
    return tempdir


In [6]:
def make_tempfile(basedir='skmap', prefix='', suffix='', make_subdir = False):
    tempdir = make_tempdir(basedir, make_subdir=make_subdir)
    return tempdir.joinpath(
        Path(tempfile.NamedTemporaryFile(prefix=prefix, suffix=suffix).name).name
    )

In [7]:

print("Reading tiles gpkg")
tiles = gpd.read_file('ard2_final_status.gpkg')

tile = '047W_11S'
year = 2020
minx, miny, maxx, maxy = tiles[tiles['TILE'] == tile].iloc[0].geometry.bounds

landsat_files, dict_layers_newnames = _model_input(tile, year, year)
static_files = find_files('./static', '*.vrt')
static_files = [ str(f) for f in static_files ]
n_features = 172

# shape = (4000, 4000, n_features)
shape = (3, n_features)
array_fn = 'file://' + str(make_tempfile(prefix='shm_array'))
array = sa.create(array_fn, shape, dtype=float)
array = sa.attach(array_fn, False)
# array = array[:,:,0:169]
array = array[:,0:3]
    
# start = time.time()
# array = read_rasters(static_files, array_fn=array_fn, minx=minx, maxy=maxy)
# print(f"Reading static: {(time.time() - start):.2f} segs")

# start = time.time()
# i = len(static_files) 
# array = read_rasters(landsat_files, array_i=i, array_fn=array_fn, minx=minx, maxy=maxy)
# print(f"Reading landsat: {(time.time() - start):.2f} segs")

# lockup = [ Path(l).stem for l in static_files ] + \
#              [ Path(l).stem.replace(f'{year}', '') for l in landsat_files ]
# lockup = { lockup[i]: i for i in range(0, len(lockup)) }

# start = time.time()
# lockup, array = in_mem_calc(lockup, array)
# print(f"In memory calc: {(time.time() - start):.2f} segs")
# print(f"Number of feature: {len(lockup)}")
# print(f"Array shape: {array.shape}")


Reading tiles gpkg


# Fill the shared array with data

In [7]:
# import matplotlib.pyplot as plt
# import sys
# import numpy as np
# import os
# from skmap.data import toy
# from skmap.io import process
# import importlib
# import glob
# import ctypes


# n_pix = 5
# n_feat = 12
# d_type = np.double
# data = np.random.rand(n_pix, n_feat).astype(d_type)
# data[:,0:4] = np.nan
# libfile = glob.glob('build/*/skmap_bindings*.so')[0]
# skmap_bindings = ctypes.CDLL(libfile)
# filled = np.empty(data.shape, dtype=d_type)
# filled_qa = np.empty(data.shape, dtype=d_type)
# skmap_bindings.parReadFiles.restype = ctypes.c_int
# skmap_bindings.parReadFiles.argtypes = \
#     [ctypes.c_uint,
#     ctypes.c_uint,
#     np.ctypeslib.ndpointer(dtype=d_type, ndim=2, flags='WRITEABLE')]
# res = skmap_bindings.parReadFiles(n_pix, n_feat, data[:])

In [2]:
import numpy as np
import skmap_bindings
import time
n_feat = 6
n_pix = 4004*4004
n_threads = 48

ImportError: /home/dconsoli/Documents/scikit-map-pasture-class/skmap_bindings.cpython-38-x86_64-linux-gnu.so: undefined symbol: _ZN11GDALDataset8RasterIOE10GDALRWFlagiiiiPvii12GDALDataTypeiPixxxP20GDALRasterIOExtraArg

In [None]:
test_np = np.ones((n_feat,n_pix), dtype=np.float32, order='C')
print("Before")
start = time.time()
skmap_bindings.parReadFiles(test_np, test_np.shape[0], test_np.shape[1], n_threads)
print(f"After C++ in {time.time() - start}")
print(test_np)