In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
# %load_ext cudf.pandas
# import pandas as pd
# print(pd)


In [3]:
# Optimizations
# GDAL optimizations
import multiprocessing as mp
import os

cpu_count: int = mp.cpu_count()
num_cores: int = cpu_count - 2
os.environ["GDAL_NUM_THREADS"] = f"{num_cores}"
os.environ["GDAL_CACHEMAX"] = "1024"


## Libraries

In [4]:
# Imports
from pathlib import Path
import numpy as np


In [5]:
# Add root to path
import sys

sys.path.append("..")
from component.script.utilities.file_filter import (
    list_files_by_extension,
    filter_files_by_keywords,
    filter_files_by_keywords_strict,
)


## Set user parameters

In [6]:
project_name = "test"


In [7]:
forest_source = "gfc"  ##gfc, tmf
tree_cover_threshold = 10
years = [2015, 2020, 2024]


In [8]:
static_variables = ["altitude", "slope", "pa", "subj", "dist_rivers", "dist_roads"]
dynamic_variables = [
    "forest",
    "deforestation",
    "forest_edge",
    "dist_towns",
]


In [9]:
n_samples = 10000
random_seed = 1
spatial_cell_size_km = 4
adapt = True


## Connect folders

In [10]:
root_folder: Path = Path.cwd().parent
downloads_folder: Path = root_folder / "data"
downloads_folder.mkdir(parents=True, exist_ok=True)


In [11]:
project_folder = downloads_folder / project_name
project_folder.mkdir(parents=True, exist_ok=True)
processed_data_folder = project_folder / "data"
processed_data_folder.mkdir(parents=True, exist_ok=True)
sampling_folder = project_folder / "far_samples"
sampling_folder.mkdir(parents=True, exist_ok=True)


## Select forest cover change file

In [12]:
# List all raster files in the processed data folder
input_raster_files = list_files_by_extension(processed_data_folder, [".tiff", ".tif"])
forest_change_file = filter_files_by_keywords(input_raster_files, ["defostack"])[0]
forest_change_file


PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_defostack_gfc_10_2015_2020_2024_reprojected.tif')

## Periods dictionaries

In [13]:
import re
from pathlib import Path


def create_full_period_dict(
    years: list[int],
    period: str,
    processed_data_folder: Path,
    static_variables: list[str],
    dynamic_variables: list[str],
):
    """
    Create a comprehensive dictionary for a given modeling period.
    Handles period-independent and multi-temporal variables separately.
    """

    if len(years) < 3:
        raise ValueError("The 'years' list must contain at least three elements.")

    configs = {
        "calibration": {
            "train_period": "calibration",
            "initial_idx": 0,
            "final_idx": 1,
            "defor_value": 1,
            "var_idx": 0,
        },
        "validation": {
            "train_period": "calibration",
            "initial_idx": 1,
            "final_idx": 2,
            "defor_value": 1,
            "var_idx": 1,
        },
        "historical": {
            "train_period": "historical",
            "initial_idx": 0,
            "final_idx": 2,
            "defor_value": [1, 2],
            "var_idx": 0,
        },
        "forecast": {
            "train_period": "historical",
            "initial_idx": 0,
            "final_idx": 2,
            "defor_value": [1, 2],
            "var_idx": 2,
        },
    }

    if period not in configs:
        raise ValueError(f"Unknown period '{period}'. Must be one of: {list(config)}.")

    c = configs[period]

    # --- Base period dictionary ---
    period_dict = {
        "period": period,
        "train_period": c["train_period"],
        "initial_year": years[c["initial_idx"]],
        "final_year": years[c["final_idx"]],
        "defor_value": c["defor_value"],
        "time_interval": years[c["final_idx"]] - years[c["initial_idx"]],
        "var_year": years[c["var_idx"]],
    }

    initial_year = str(period_dict["initial_year"])
    final_year = str(period_dict["final_year"])
    var_year = str(period_dict["var_year"])
    exclude_years = ", ".join(map(str, set(years) - {initial_year, final_year}))
    period_name = str(period_dict["period"])

    variable_file_mapping = {"period": period}
    input_raster_files = list_files_by_extension(
        processed_data_folder, [".tiff", ".tif"]
    )

    # --- Modular file search ---
    def _is_token_separate_in_name(token: str, name: str) -> bool:
        """
        Devuelve True si `token` aparece en `name` como 'palabra' separada por
        caracteres no alfanuméricos o en los límites (comportamiento similar a \b,
        pero \b considera "_" como no palabra; aquí queremos lo mismo).
        """
        if token.isdigit():  # años u otros números: buscar la secuencia directamente
            return token in name
        # construimos regex que asegura token no está pegado a letras o números
        pattern = rf"(?<![0-9A-Za-z]){re.escape(token)}(?![0-9A-Za-z])"
        return re.search(pattern, name) is not None

    def _strict_candidate_filter(candidates, tokens):
        """
        Filtra candidatos manteniendo sólo aquellos que contienen todos los tokens
        como 'palabras' separadas (ver _is_token_separate_in_name).
        """
        filtered = []
        for p in candidates:
            s = str(p).lower()
            if all(_is_token_separate_in_name(tok.lower(), s) for tok in tokens):
                filtered.append(p)
        return filtered

    def find_file(var_name, dynamic=False):
        """
        Busca un archivo que contenga los términos relevantes.
        Si es dinámico, incluye los años del periodo.
        """
        parts = var_name.split("_")
        include_terms = []
        if len(parts) == 1:
            exclude_terms = ["distance", "edge"]
        else:
            exclude_terms = None

        if dynamic:
            if period_name != "forecast":
                # Buscar archivos que incluyan los años del periodo
                if "deforestation" in parts:
                    include_terms = [*parts, initial_year, final_year]
                else:
                    include_terms = [*parts, initial_year]
            elif period_name == "forecast":
                include_terms = [*parts, var_year]
        else:
            include_terms = parts

        # Buscar distancias o bordes si el nombre lo indica
        if "dist" in parts and "distance" not in include_terms:
            include_terms.append("distance")

        files = filter_files_by_keywords(
            input_raster_files, include_terms, False, exclude_terms, True
        )
        # Si no hay archivos, devolvemos None
        if not files and period_name == "forecast":
            include_terms = [*parts, str(years[1])]
            files = filter_files_by_keywords(
                input_raster_files, include_terms, False, exclude_terms, True
            )
        if not files:
            return None

        # Si viene solo 1, ok
        if len(files) == 1:
            return files[0]
        strict = _strict_candidate_filter(files, parts)
        if strict:
            # si hay múltiplos aún, devolvemos el primero (heurística)
            return strict[0]

    # --- Buscar variables independientes ---
    for var in static_variables:
        variable_file_mapping[var] = find_file(var, dynamic=False)

    # --- Buscar variables multitemporales ---
    for var in dynamic_variables:
        variable_file_mapping[var] = find_file(var, dynamic=True)

    # --- Merge final ---
    period_dict.update(variable_file_mapping)
    return period_dict


In [14]:
calibration_files = create_full_period_dict(
    years,
    "calibration",
    processed_data_folder,
    static_variables,
    dynamic_variables,
)
calibration_files


{'period': 'calibration',
 'train_period': 'calibration',
 'initial_year': 2015,
 'final_year': 2020,
 'defor_value': 1,
 'time_interval': 5,
 'var_year': 2015,
 'altitude': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_altitude_reprojected.tif'),
 'slope': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_slope_reprojected.tif'),
 'pa': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_pa_reprojected.tif'),
 'subj': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_subj_reprojected.tif'),
 'dist_rivers': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_rivers_reprojected_distance.tif'),
 'dist_roads': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_roads_reprojected_distance.tif'),
 'forest': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_forest_gfc_10_2015_reprojected.tif'),
 'deforestation': PosixPath

In [15]:
validation_files = create_full_period_dict(
    years,
    "validation",
    processed_data_folder,
    static_variables,
    dynamic_variables,
)
validation_files


{'period': 'validation',
 'train_period': 'calibration',
 'initial_year': 2020,
 'final_year': 2024,
 'defor_value': 1,
 'time_interval': 4,
 'var_year': 2020,
 'altitude': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_altitude_reprojected.tif'),
 'slope': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_slope_reprojected.tif'),
 'pa': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_pa_reprojected.tif'),
 'subj': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_subj_reprojected.tif'),
 'dist_rivers': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_rivers_reprojected_distance.tif'),
 'dist_roads': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_roads_reprojected_distance.tif'),
 'forest': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_forest_gfc_10_2020_reprojected.tif'),
 'deforestation': PosixPath(

In [16]:
historical_files = create_full_period_dict(
    years,
    "historical",
    processed_data_folder,
    static_variables,
    dynamic_variables,
)
historical_files


{'period': 'historical',
 'train_period': 'historical',
 'initial_year': 2015,
 'final_year': 2024,
 'defor_value': [1, 2],
 'time_interval': 9,
 'var_year': 2015,
 'altitude': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_altitude_reprojected.tif'),
 'slope': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_slope_reprojected.tif'),
 'pa': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_pa_reprojected.tif'),
 'subj': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_subj_reprojected.tif'),
 'dist_rivers': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_rivers_reprojected_distance.tif'),
 'dist_roads': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_roads_reprojected_distance.tif'),
 'forest': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_forest_gfc_10_2015_reprojected.tif'),
 'deforestation': PosixP

In [17]:
forecast_files = create_full_period_dict(
    years,
    "forecast",
    processed_data_folder,
    static_variables,
    dynamic_variables,
)
forecast_files


{'period': 'forecast',
 'train_period': 'historical',
 'initial_year': 2015,
 'final_year': 2024,
 'defor_value': [1, 2],
 'time_interval': 9,
 'var_year': 2024,
 'altitude': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_altitude_reprojected.tif'),
 'slope': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_slope_reprojected.tif'),
 'pa': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_pa_reprojected.tif'),
 'subj': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_subj_reprojected.tif'),
 'dist_rivers': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_rivers_reprojected_distance.tif'),
 'dist_roads': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_roads_reprojected_distance.tif'),
 'forest': PosixPath('/home/jose/workspace/deforisk-jupyter-nb-v2/data/test/data/test_forest_gfc_10_2024_reprojected.tif'),
 'deforestation': PosixPat

## Generate sample for the period

In [18]:
"""Sample points and extract raster values."""

# Import
from glob import glob  # To explore files in a folder
import os  # Operating system interfaces
import sys  # To read and write files
import uuid

# Third party imports
import numpy as np  # For arrays
from osgeo import gdal  # GIS libraries
import pandas as pd  # To export result as a pandas DF

# Local imports
from forestatrisk.misc import makeblock, progress_bar


def sample(
    nsamp: int = 10000,
    adapt: bool = True,
    seed: int = 1234,
    csize: int = 10,
    var_dictionary="dictionary_of_files.txt",
    variable2sample: str = "deforestation",
    blk_rows: int = 0,
    verbose: bool = True,
):
    """Sample points and extract raster values.

    This function (i) randomly draws spatial points in deforested and
    forested areas and (ii) extract environmental variable values for
    each spatial point.

    :param nsamp: Number of random spatial points.

    :param adapt: Boolean. Adapt ``nsamp`` to forest area: 1000 for 1 Mha of
        forest, with min=10000 and max=50000. Default to ``True``.

    :param seed: Seed for random number generator.

    :param csize: Spatial cell size in km.

    :param var_dir: Directory with raster data.

    :param blk_rows: If > 0, number of lines per block.

    :param verbose: Toogle progress bar.

    :return: A Pandas DataFrame, each row being one observation.

    """

    period_output_folder = sampling_folder / var_dictionary["period"]
    period_output_folder.mkdir(parents=True, exist_ok=True)

    # Set random seed
    np.random.seed(seed)

    # =============================================
    # Sampling pixels
    # =============================================

    if verbose:
        text = "Sample 2x {} pixels (deforested vs. forest)"
        print(text.format(nsamp))

    # Read defor raster
    forest_raster_file = var_dictionary[variable2sample]
    forestR = gdal.Open(forest_raster_file)
    forestB = forestR.GetRasterBand(1)

    # Make blocks
    blockinfo = makeblock(forest_raster_file, blk_rows=blk_rows)
    nblock = blockinfo[0]
    nblock_x = blockinfo[1]
    x = blockinfo[3]
    y = blockinfo[4]
    nx = blockinfo[5]
    ny = blockinfo[6]
    if verbose:
        text = "Divide region in {} blocks"
        print(text.format(nblock))

    # Number of defor/forest pixels by block
    if verbose:
        text = "Compute number of deforested and forest pixels per block"
        print(text)
    ndc = 0
    ndc_block = np.zeros(nblock, dtype=int)
    nfc = 0
    nfc_block = np.zeros(nblock, dtype=int)

    # Loop on blocks of data
    for b in range(nblock):
        # Progress bar
        if verbose:
            progress_bar(nblock, b + 1)
        # Position in 1D-arrays
        px = b % nblock_x
        py = b // nblock_x
        # Read the data
        forest = forestB.ReadAsArray(x[px], y[py], nx[px], ny[py])
        # Identify pixels (x/y coordinates) which are deforested
        deforpix = np.nonzero(forest == 0)
        ndc_block[b] = len(deforpix[0])  # Number of defor pixels
        ndc += len(deforpix[0])
        # Identify pixels (x/y coordinates) which are forest
        forpix = np.nonzero(forest == 1)
        nfc_block[b] = len(forpix[0])  # Number of forest pixels
        nfc += len(forpix[0])

    # Adapt nsamp to forest area
    if adapt is True:
        gt = forestR.GetGeoTransform()
        pix_area = gt[1] * (-gt[5])
        farea = pix_area * (nfc + ndc) / 10000  # farea in ha
        nsamp_prop = 1000 * farea / 1e6  # 1000 per 1Mha
        if nsamp_prop >= 50000:
            nsamp = 50000
        elif nsamp_prop <= 10000:
            nsamp = 10000
        else:
            nsamp = int(np.rint(nsamp_prop))

    # Proba of drawing a block
    if verbose:
        print("Draw blocks at random")
    proba_block_d = ndc_block / ndc
    proba_block_f = nfc_block / nfc
    # Draw block number nsamp times
    block_draw_d = np.random.choice(
        list(range(nblock)), size=nsamp, replace=True, p=proba_block_d
    )
    block_draw_f = np.random.choice(
        list(range(nblock)), size=nsamp, replace=True, p=proba_block_f
    )
    # Number of times the block is drawn
    nblock_draw_d = np.zeros(nblock, dtype=int)
    nblock_draw_f = np.zeros(nblock, dtype=int)
    for s in range(nsamp):
        nblock_draw_d[block_draw_d[s]] += 1
        nblock_draw_f[block_draw_f[s]] += 1

    # Draw defor/forest pixels in blocks
    if verbose:
        print("Draw pixels at random in blocks")
    # Object to store coordinates of selected pixels
    deforselect = np.empty(shape=(0, 2), dtype=int)
    forselect = np.empty(shape=(0, 2), dtype=int)
    # Loop on blocks of data
    for b in range(nblock):
        # Progress bar
        if verbose:
            progress_bar(nblock, b + 1)
        # nbdraw
        nbdraw_d = nblock_draw_d[b]
        nbdraw_f = nblock_draw_f[b]
        # Position in 1D-arrays
        px = b % nblock_x
        py = b // nblock_x
        # Read the data
        forest = forestB.ReadAsArray(x[px], y[py], nx[px], ny[py])
        # Identify pixels (x/y coordinates) which are deforested
        # !! Values returned in row-major, C-style order (y/x) !!
        deforpix = np.nonzero(forest == 0)
        deforpix = np.transpose((x[px] + deforpix[1], y[py] + deforpix[0]))
        ndc_block = len(deforpix)
        # Identify pixels (x/y coordinates) which are forested
        forpix = np.nonzero(forest == 1)
        forpix = np.transpose((x[px] + forpix[1], y[py] + forpix[0]))
        nfc_block = len(forpix)
        # Sample deforested pixels
        if nbdraw_d > 0:
            if nbdraw_d < ndc_block:
                i = np.random.choice(ndc_block, size=nbdraw_d, replace=False)
                deforselect = np.concatenate((deforselect, deforpix[i]), axis=0)
            else:
                # nbdraw = ndc_block
                deforselect = np.concatenate((deforselect, deforpix), axis=0)
        # Sample forest pixels
        if nbdraw_f > 0:
            if nbdraw_f < nfc_block:
                i = np.random.choice(nfc_block, size=nbdraw_f, replace=False)
                forselect = np.concatenate((forselect, forpix[i]), axis=0)
            else:
                # nbdraw = ndc_block
                forselect = np.concatenate((forselect, forpix), axis=0)

    # =============================================
    # Compute center of pixel coordinates
    # =============================================
    if verbose:
        print("Compute center of pixel coordinates")

    # Landscape variables from forest raster
    gt = forestR.GetGeoTransform()
    ncol_r = forestR.RasterXSize
    nrow_r = forestR.RasterYSize
    Xmin = gt[0]
    Xmax = gt[0] + gt[1] * ncol_r
    Ymin = gt[3] + gt[5] * nrow_r
    Ymax = gt[3]

    # Dereference driver
    forestB = None
    del forestR

    # Concatenate selected pixels
    select = np.concatenate((deforselect, forselect), axis=0)

    # Offsets and coordinates #
    xOffset = select[:, 0]
    yOffset = select[:, 1]
    pts_x = (xOffset + 0.5) * gt[1] + gt[0]  # +0.5 for center of pixels
    pts_y = (yOffset + 0.5) * gt[5] + gt[3]  # +0.5 for center of pixels

    # ================================================
    # Compute cell number for spatial autocorrelation
    # ================================================

    # Cell number from region
    if verbose:
        text = "Compute number of {} x {} km spatial cells"
        print(text.format(csize, csize))
    csize = csize * 1000  # Transform km in m
    ncol = int(np.ceil((Xmax - Xmin) / csize))
    nrow = int(np.ceil((Ymax - Ymin) / csize))
    ncell = ncol * nrow
    if verbose:
        text = "... {} cells ({} x {})"
        print(text.format(ncell, nrow, ncol))
    # bigI and bigJ are the coordinates of the cells and start at zero
    if verbose:
        print("Identify cell number from XY coordinates")
    bigJ = ((pts_x - Xmin) / csize).astype(int)
    bigI = ((Ymax - pts_y) / csize).astype(int)
    cell = bigI * ncol + bigJ  # Cell number starts at zero

    # =============================================
    # Extract values from rasters
    # =============================================

    # Raster list
    # Extract keys excluding 'fcc' and sort them
    sorted_keys = sorted(
        [key for key, value in var_dictionary.items() if isinstance(value, Path)]
    )

    # Retrieve the corresponding file paths based on the sorted keys
    raster_list = [var_dictionary[key] for key in sorted_keys]

    # Make vrt with gdal.BuildVRT
    # Note: Extent and resolution from forest raster!
    if verbose:
        text = "Make virtual raster with variables as raster bands"
        print(text)
    param = gdal.BuildVRTOptions(
        resolution="user",
        outputBounds=(Xmin, Ymin, Xmax, Ymax),
        xRes=gt[1],
        yRes=-gt[5],
        separate=True,
    )
    rand_uuid = uuid.uuid4()
    vrt_file = f"/vsimem/var_{rand_uuid}.vrt"
    gdal.BuildVRT(vrt_file, raster_list, options=param)
    stack = gdal.Open(vrt_file)

    # List of nodata values
    nband = stack.RasterCount
    bandND = np.zeros(nband)
    for k in range(nband):
        band = stack.GetRasterBand(k + 1)
        bandND[k] = band.GetNoDataValue()
        if bandND[k] is None:
            print(
                "NoData value is not specified \
            for input raster file "
                + raster_list[k]
            )
            sys.exit(1)

    # Numpy array to store values
    nobs = select.shape[0]
    val = np.zeros(shape=(nobs, nband), dtype=float)

    # Extract raster values
    if verbose:
        text = "Extract raster values for selected pixels"
        print(text)
    for i in range(nobs):
        # Progress bar
        if verbose:
            progress_bar(nobs, i + 1)
        # ReadArray for extract
        extract = stack.ReadAsArray(int(xOffset[i]), int(yOffset[i]), 1, 1)
        val[i, :] = extract.reshape(
            nband,
        )

    # Close stack
    del stack

    # Replace NA
    # NB: ReadAsArray return float32 type
    bandND = bandND.astype(np.float32)
    for k in range(nband):
        val[val[:, k] == bandND[k], k] = np.nan

    # Add XY coordinates and cell number
    pts_x.shape = (nobs, 1)
    pts_y.shape = (nobs, 1)
    cell.shape = (nobs, 1)
    val = np.concatenate((val, pts_x, pts_y, cell), axis=1)

    # =============================================
    # Export and return value
    # =============================================

    # Save csize for interpolation of rhos
    ofile = os.path.join(period_output_folder, "csize_icar.txt")
    with open(ofile, "w", encoding="utf-8") as f:
        csize_km = csize / 1000
        f.write(str(csize_km))
    output_file = str(period_output_folder / "sample.txt")
    if verbose:
        text = "Export results to file {}"
        print(text.format(output_file))

    # Write to file by row
    colname = sorted_keys
    # for (i, j) in enumerate(sorted_keys):
    #     base_name = os.path.basename(j)
    #     index_dot = base_name.index(".")
    #     colname[i] = base_name[:index_dot]

    varname = ",".join(colname) + ",X,Y,cell"
    np.savetxt(output_file, val, header=varname, fmt="%s", delimiter=",", comments="")

    # Convert to pandas DataFrame and return the result
    colname.extend(["X", "Y", "cell"])
    val_df = pd.DataFrame(val, columns=colname)

    # Remove NA from data-set (otherwise scale() and
    # model_binomial_iCAR don't work)
    dataset = val_df.dropna(axis=0).copy()
    print(f"Number of samples: {len(dataset)}")
    # Set number of trials to one for far.model_binomial_iCAR()
    dataset.loc[:, "trial"] = 1  # Sample size
    ndefor = sum(dataset[variable2sample] == 0)
    nfor = sum(dataset[variable2sample] == 1)
    ifile = str(period_output_folder / "sample_size.csv")
    with open(ifile, "w", encoding="utf-8") as file:
        file.write("Var, n\n")
        file.write(f"ndefor, {ndefor}\n")
        file.write(f"nfor, {nfor}\n")
    print(f"Sample size: ndefor = {ndefor}, nfor = {nfor}")


# End


In [19]:
calibration_samples = sample(
    n_samples,
    adapt,
    random_seed,
    spatial_cell_size_km,
    calibration_files,
    "deforestation",
)


Sample 2x 10000 pixels (deforested vs. forest)
Divide region in 960 blocks
Compute number of deforested and forest pixels per block
forestatrisk: 0...10...20...30...40...50...60...70...80...90...100 - done
Draw blocks at random
Draw pixels at random in blocks
forestatrisk: 0...10...20...30...40...50...60...70...80...90...100 - done
Compute center of pixel coordinates
Compute number of 4 x 4 km spatial cells
... 3477 cells (61 x 57)
Identify cell number from XY coordinates
Make virtual raster with variables as raster bands
Extract raster values for selected pixels
forestatrisk: 0...10...20...30...40...50...60...70...80...90...100 - done
Export results to file /home/jose/workspace/deforisk-jupyter-nb-v2/data/test/far_samples/calibration/sample.txt
Number of samples: 19980
Sample size: ndefor = 9998, nfor = 9982


In [20]:
historical_samples = sample(
    n_samples,
    adapt,
    random_seed,
    spatial_cell_size_km,
    historical_files,
    "deforestation",
)


Sample 2x 10000 pixels (deforested vs. forest)
Divide region in 960 blocks
Compute number of deforested and forest pixels per block
forestatrisk: 0...10...20...30...40...50...60...70...80...90...100 - done
Draw blocks at random
Draw pixels at random in blocks
forestatrisk: 0...10...20...30...40...50...60...70...80...90...100 - done
Compute center of pixel coordinates
Compute number of 4 x 4 km spatial cells
... 3477 cells (61 x 57)
Identify cell number from XY coordinates
Make virtual raster with variables as raster bands
Extract raster values for selected pixels
forestatrisk: 0...10...20...30...40...50...60...70...80...90...100 - done
Export results to file /home/jose/workspace/deforisk-jupyter-nb-v2/data/test/far_samples/historical/sample.txt
Number of samples: 19988
Sample size: ndefor = 9998, nfor = 9990
