# Roll-out data preparation

This notebook runs feature engineering on the target municipalites needed for model roll-out

### Input
- Features for the priority municipalities

### Output
- Feature table for roll-out

In [1]:
%load_ext autoreload
%autoreload 2

# Imports and Set-up

In [2]:
import sys
import polars as pl
import pandas as pd
from loguru import logger

import geopandas as gpd



In [3]:
sys.path.append("../../")  # include parent directory
from src import bing_tile_utils
from src.polars_utils import log_condition, log_duplicates

from src.settings import DATA_DIR

In [4]:
MODEL_DIR = DATA_DIR / "models"
VECTOR_DIR = DATA_DIR / "vectors"
OUTPUT_DIR = DATA_DIR / "output/component_1"

In [5]:
FEATURES_FPATH = (
    DATA_DIR / "aligned/parquets/aligned_dataset_consolidated_20240503.parquet"
)
BINGTILE_FPATH = (
    DATA_DIR / "admin_bounds/grids_landslide_w_xyz_zoomlevel18_20240320.parquet"
)

ADM_GRIDS = DATA_DIR / "admin_bounds/grids_target_muni_wadm_zoomlevel18_20240304.gpkg"

In [6]:
# lattice variables
BING_TILE_ZOOM_LEVEL = 18
USE_WEIGHTED_LATTICE = False
LATTICE_RADIUS = 3
CHEBYSHEV_DIST_COL = "chebyshev_dist_col"

OUTPUT_VERSION = pd.to_datetime("today").strftime("%Y%m%d")
OUTPUT_FPATH = (
    MODEL_DIR
    / f"rollout_data/rollout_data_w_lattice{LATTICE_RADIUS}_{OUTPUT_VERSION}.parquet"
)

## Import Data


In [7]:
bingtiles = pl.read_parquet(BINGTILE_FPATH)

In [8]:
bingtiles.head(), bingtiles.shape

(shape: (5, 4)
 ┌────────────────────┬───────┬────────┬─────┐
 │ quadkey            ┆ x     ┆ y      ┆ z   │
 │ ---                ┆ ---   ┆ ---    ┆ --- │
 │ str                ┆ i64   ┆ i64    ┆ i64 │
 ╞════════════════════╪═══════╪════════╪═════╡
 │ 032232230000210321 ┆ 74773 ┆ 130086 ┆ 18  │
 │ 032232230000210330 ┆ 74774 ┆ 130086 ┆ 18  │
 │ 032232230000210331 ┆ 74775 ┆ 130086 ┆ 18  │
 │ 032232230000211220 ┆ 74776 ┆ 130086 ┆ 18  │
 │ 032232230000210323 ┆ 74773 ┆ 130087 ┆ 18  │
 └────────────────────┴───────┴────────┴─────┘,
 (1798240, 4))

In [9]:
adm_grids = gpd.read_file(ADM_GRIDS)
adm_grids.drop(columns=["geometry"], inplace=True)
adm_grids = pl.from_pandas(adm_grids)

In [10]:
adm_grids.head(), adm_grids.shape

(shape: (5, 15)
 ┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 │ quadkey   ┆ MPIO_CCNC ┆ MPIO_CNMB ┆ MPIO_CNMB ┆ … ┆ MPIO_NARE ┆ MPIO_NANO ┆ SHAPE_ARE ┆ SHAPE_LE │
 │ ---       ┆ T         ┆ R         ┆ R_EN      ┆   ┆ A         ┆ ---       ┆ A         ┆ N        │
 │ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ i64       ┆ ---       ┆ ---      │
 │           ┆ str       ┆ str       ┆ str       ┆   ┆ f64       ┆           ┆ f64       ┆ f64      │
 ╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
 │ 032232230 ┆ 86749     ┆ SIBUNDOY  ┆ SIBUNDOY  ┆ … ┆ 97.734625 ┆ 2020      ┆ 0.007922  ┆ 0.511382 │
 │ 100333331 ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
 │ 032232230 ┆ 86749     ┆ SIBUNDOY  ┆ SIBUNDOY  ┆ … ┆ 97.734625 ┆ 2020      ┆ 0.007922  ┆ 0.511382 │
 │ 100333323 ┆           ┆           ┆           ┆   ┆           ┆

In [11]:
features_df = pl.read_parquet(FEATURES_FPATH)

In [12]:
features_df.head(), features_df.shape

(shape: (5, 50)
 ┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 │ quadkey   ┆ MPIO_CCNC ┆ MPIO_CNMB ┆ MPIO_CNMB ┆ … ┆ hillshade ┆ hillshade ┆ distance_ ┆ distance │
 │ ---       ┆ T         ┆ R         ┆ R_EN      ┆   ┆ _count    ┆ _median   ┆ m_roads   ┆ _m_river │
 │ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ s        │
 │           ┆ str       ┆ str       ┆ str       ┆   ┆ i64       ┆ f64       ┆ f64       ┆ ---      │
 │           ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ f64      │
 ╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
 │ 032232230 ┆ 52001     ┆ PASTO     ┆ PASTO     ┆ … ┆ 256       ┆ 0.0       ┆ 654.41195 ┆ 440.4481 │
 │ 000210321 ┆           ┆           ┆           ┆   ┆           ┆           ┆ 7         ┆ 32       │
 │ 032232230 ┆ 52001     ┆ PASTO     ┆ PASTO     ┆ … ┆ 256       ┆

In [13]:
priority_cities_quadkeys = list(adm_grids["quadkey"])

In [14]:
# subset features to priority municipalities
priority_city_feat = features_df.filter(
    pl.col("quadkey").is_in(priority_cities_quadkeys)
)

In [15]:
priority_city_feat.head(), priority_city_feat.shape

(shape: (5, 50)
 ┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 │ quadkey   ┆ MPIO_CCNC ┆ MPIO_CNMB ┆ MPIO_CNMB ┆ … ┆ hillshade ┆ hillshade ┆ distance_ ┆ distance │
 │ ---       ┆ T         ┆ R         ┆ R_EN      ┆   ┆ _count    ┆ _median   ┆ m_roads   ┆ _m_river │
 │ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ s        │
 │           ┆ str       ┆ str       ┆ str       ┆   ┆ i64       ┆ f64       ┆ f64       ┆ ---      │
 │           ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ f64      │
 ╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
 │ 032232230 ┆ 86749     ┆ SIBUNDOY  ┆ SIBUNDOY  ┆ … ┆ 289       ┆ 0.0       ┆ null      ┆ 7235.630 │
 │ 100333331 ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ 84       │
 │ 032232230 ┆ 86749     ┆ SIBUNDOY  ┆ SIBUNDOY  ┆ … ┆ 256       ┆

In [16]:
# Get bingtile x,y for target municipalities
priority_city_feat = priority_city_feat.join(bingtiles, on="quadkey")

In [17]:
priority_city_feat.select(pl.all().is_null().sum())

quadkey,MPIO_CCNCT,MPIO_CNMBR,MPIO_CNMBR_EN,DPTO_CNMBR,DPTO_CNMBR_EN,Municipio,Municipio_EN,DPTO_CCDGO,MPIO_CCDGO,MPIO_CRSLC,MPIO_NAREA,MPIO_NANO,SHAPE_AREA,SHAPE_LEN,slope_min,slope_max,slope_count,slope_median,aspect_min,aspect_max,aspect_count,aspect_median,soil_class,elevation_min,elevation_max,elevation_count,elevation_median,ndvi2023_min,ndvi2023_max,ndvi2023_count,ndvi2023_median,rainfall_mm_min,rainfall_mm_max,rainfall_mm_count,rainfall_mm_median,__index_level_0__,lithology_type,sand_5-15cm_mean,sand_100-200cm_mean,silt_5-15cm_mean,silt_100-200cm_mean,clay_5-15cm_mean,clay_100-200cm_mean,hillshade_min,hillshade_max,hillshade_count,hillshade_median,distance_m_roads,distance_m_rivers,x,y,z
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Impute missing values

In [None]:
priority_city_feat = priority_city_feat.drop(
    ["ndvi2023_min", "ndvi2023_max", "ndvi2023_count", "ndvi2023_median"]
)

In [None]:
priority_city_feat = priority_city_feat.with_columns(
    pl.col(
        [
            "rainfall_mm_min",
            "rainfall_mm_max",
            "rainfall_mm_count",
            "rainfall_mm_median",
        ]
    ).fill_null(strategy="zero")
)

In [None]:
priority_city_feat = priority_city_feat.with_columns(
    pl.col(["distance_m_roads", "distance_m_rivers"]).fill_null(-99)
)

In [None]:
# rainfall median
priority_city_feat = priority_city_feat.with_columns(
    pl.when(pl.col("rainfall_mm_median") < 0)
    .then(pl.lit(0))
    .otherwise(pl.col("rainfall_mm_median"))
    .alias("rainfall_mm_median")
)

In [None]:
priority_city_feat.with_columns(pl.col("rainfall_mm_median") < 0)

# Get lattices

## Get weight expressions

In [18]:
exprs_dict = bing_tile_utils.get_lattice_weight_exprs(
    use_weighted_lattice=USE_WEIGHTED_LATTICE,
    radius=LATTICE_RADIUS,
    group_by_cols="center_quadkey",
    chebyshev_dist_col=CHEBYSHEV_DIST_COL,
)

chebyshev_count_exprs = exprs_dict["chebyshev_count_exprs"]
lattice_weight_exprs = exprs_dict["lattice_weight_exprs"]
lattice_weight_multiplier = exprs_dict["lattice_weight_multiplier"]

In [19]:
columns = ["quadkey"]
landslide_quadkeys = (
    priority_city_feat.pipe(log_duplicates, columns)
    .unique()
    .pipe(log_condition, pl.any_horizontal([pl.col("*").is_null()]))
    .drop_nulls()
)
landslide_quadkeys.head()

2024-05-07 16:18:09.158 | INFO     | src.polars_utils:log_duplicates:51 - There are 0 duplicate rows based on these columns ['quadkey']
2024-05-07 16:18:09.237 | INFO     | src.polars_utils:log_condition:17 - There are 0 rows meeting condition *.is_null().any_horizontal()


quadkey,MPIO_CCNCT,MPIO_CNMBR,MPIO_CNMBR_EN,DPTO_CNMBR,DPTO_CNMBR_EN,Municipio,Municipio_EN,DPTO_CCDGO,MPIO_CCDGO,MPIO_CRSLC,MPIO_NAREA,MPIO_NANO,SHAPE_AREA,SHAPE_LEN,slope_min,slope_max,slope_count,slope_median,aspect_min,aspect_max,aspect_count,aspect_median,soil_class,elevation_min,elevation_max,elevation_count,elevation_median,ndvi2023_min,ndvi2023_max,ndvi2023_count,ndvi2023_median,rainfall_mm_min,rainfall_mm_max,rainfall_mm_count,rainfall_mm_median,__index_level_0__,lithology_type,sand_5-15cm_mean,sand_100-200cm_mean,silt_5-15cm_mean,silt_100-200cm_mean,clay_5-15cm_mean,clay_100-200cm_mean,hillshade_min,hillshade_max,hillshade_count,hillshade_median,distance_m_roads,distance_m_rivers,x,y,z
str,str,str,str,str,str,str,str,str,str,str,f64,i64,f64,f64,f64,f64,i64,f64,f64,f64,i64,f64,f64,f64,f64,i64,f64,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,i64,i64,i64
"""03223223010033…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,0.0,26.0,36,0.0,0.0,142.0,256,0.0,3.0,0.0,3689.0,256,0.0,0.620972,0.708093,28,0.657662,2267.173584,2267.173584,1.0,2267.173584,82312,177.0,350.5,388.0,388.0,317.0,261.5,295.0,0.0,133.0,256,0.0,-99.0,7357.343708,75069,130111,18
"""03223223010211…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,0.0,26.0,42,14.5,0.0,176.0,289,126.0,3.0,0.0,3539.0,289,3493.0,0.757433,0.83594,166,0.789813,2267.173584,2267.173584,1.0,2267.173584,83049,177.0,328.0,376.0,406.5,315.5,265.5,308.5,0.0,177.0,289,121.0,9832.161935,6826.373141,75071,130114,18
"""03223223010211…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,8.0,29.0,36,18.5,67.0,144.0,256,111.5,3.0,3569.0,3634.0,256,3602.0,0.653631,0.810933,225,0.733945,2267.173584,2267.173584,1.0,2267.173584,83535,177.0,329.75,379.0,404.75,323.25,265.75,297.5,109.0,157.0,256,133.0,9469.616296,6999.996487,75068,130116,18
"""03223223010211…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,16.0,31.0,36,24.0,88.0,125.0,256,107.0,3.0,3453.0,3523.0,256,3479.0,0.761498,0.84216,225,0.795787,2267.173584,2267.173584,1.0,2267.173584,83537,177.0,322.25,369.5,400.25,316.25,277.75,314.0,97.0,133.0,256,120.0,9507.463646,6754.365909,75070,130116,18
"""03223223010211…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,10.0,24.0,42,17.0,90.0,196.0,272,149.0,3.0,3640.0,3707.0,272,3668.0,0.628219,0.779647,225,0.691238,2267.173584,2267.173584,1.0,2267.173584,83778,177.0,326.5,381.5,402.0,319.5,271.5,299.0,125.0,192.0,272,150.0,9289.096724,7164.983954,75066,130117,18


In [20]:
bing_tile_utils.get_bing_cluster_tile_length_m(BING_TILE_ZOOM_LEVEL, LATTICE_RADIUS)

1070.1179504394531

In [21]:
lattice_df = bing_tile_utils.generate_lattice(
    landslide_quadkeys.select("x", "y"),
    LATTICE_RADIUS,
    zoom_level=BING_TILE_ZOOM_LEVEL,
    include_chebyshev_dist=USE_WEIGHTED_LATTICE,
)
assert not lattice_df.is_duplicated().any()

print(len(lattice_df))
lattice_df.head()

10836448


center_x,center_y,lattice_x,lattice_y,lattice_quadkey
i64,i64,i64,i64,str
75069,130111,75066,130108,"""03223223010033…"
75069,130111,75066,130109,"""03223223010033…"
75069,130111,75066,130110,"""03223223010033…"
75069,130111,75066,130111,"""03223223010033…"
75069,130111,75066,130112,"""03223223010211…"


In [22]:
rename_dict = {"x": "center_x", "y": "center_y", "quadkey": "center_quadkey"}
columns = ["center_quadkey", "lattice_quadkey"]
if USE_WEIGHTED_LATTICE:
    columns += bing_tile_utils.CHEBYSHEV_DIST_COLS

lattice_df = lattice_df.join(
    landslide_quadkeys.rename(rename_dict), on=["center_x", "center_y"], how="left"
).select(columns)
print(len(lattice_df))
lattice_df.head()

10836448


center_quadkey,lattice_quadkey
str,str
"""03223223010033…","""03223223010033…"
"""03223223010033…","""03223223010033…"
"""03223223010033…","""03223223010033…"
"""03223223010033…","""03223223010033…"
"""03223223010033…","""03223223010211…"


# Calculate lattice features

## Features that need the average 

In [23]:
aggregated_cols = [
    "elevation_median",
    "slope_median",
    "aspect_median",
    "hillshade_median",
    "rainfall_mm_median",
    "sand_5-15cm_mean",
    "sand_100-200cm_mean",
    "silt_5-15cm_mean",
    "silt_100-200cm_mean",
    "clay_5-15cm_mean",
    "clay_100-200cm_mean",
    "distance_m_roads",
    "distance_m_rivers",
]

In [24]:
agg_expr = [(pl.col(col) * lattice_weight_multiplier).mean() for col in aggregated_cols]

aggregated_metrics = (
    features_df.select(["quadkey"] + aggregated_cols)
    .rename({"quadkey": "lattice_quadkey"})
    .join(
        lattice_df,
        on="lattice_quadkey",
        how="inner",
        validate="1:m",
    )
    .drop("lattice_quadkey")
    .with_columns(chebyshev_count_exprs)
    .with_columns(lattice_weight_exprs)
    .group_by("center_quadkey")
    .agg(agg_expr)
    .rename({"center_quadkey": "quadkey"})
    .sort(by="quadkey")
)
print(len(aggregated_metrics))
aggregated_metrics.head()

221152


quadkey,elevation_median,slope_median,aspect_median,hillshade_median,rainfall_mm_median,sand_5-15cm_mean,sand_100-200cm_mean,silt_5-15cm_mean,silt_100-200cm_mean,clay_5-15cm_mean,clay_100-200cm_mean,distance_m_roads,distance_m_rivers
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""03223013331131…",616.535714,16.321429,103.142857,115.053571,1768.910867,331.160714,300.901786,279.017857,263.821429,345.0,390.848214,898.256797,5096.987494
"""03223013331131…",607.95,14.183333,97.933333,112.6,1738.617017,336.033333,306.341667,279.058333,266.008333,343.075,386.166667,961.398781,5101.554011
"""03223013331131…",636.142857,16.257143,104.6,121.071429,1751.852609,334.671429,307.05,280.364286,268.042857,349.078571,389.378571,885.60206,5047.859659
"""03223013331131…",561.956522,16.652174,88.304348,106.021739,1834.327074,323.75,298.847826,281.891304,271.391304,350.836957,386.478261,746.03176,5054.777113
"""03223013331131…",618.76,17.72,100.84,113.94,1796.676309,332.25,304.12,284.63,271.58,353.05,394.54,814.063313,5079.217917


In [25]:
aggregated_metrics = aggregated_metrics.with_columns(
    pl.all().name.suffix(f"_lattice_{LATTICE_RADIUS}")
)

In [26]:
aggregated_metrics = aggregated_metrics.select(f"^.*_lattice_{LATTICE_RADIUS}$").rename(
    {f"quadkey_lattice_{LATTICE_RADIUS}": "quadkey"}
)

## Aggregation for lithology type

In [27]:
lithology_metrics = (
    features_df.select(["quadkey"] + ["lithology_type"])
    .rename({"quadkey": "lattice_quadkey"})
    .join(
        lattice_df,
        on="lattice_quadkey",
        how="inner",
        validate="1:m",
    )
    .drop("lattice_quadkey")
    .with_columns(chebyshev_count_exprs)
    .with_columns(lattice_weight_exprs)
    .group_by("center_quadkey")
    .agg(pl.col("lithology_type").mode())
    .rename(
        {
            "center_quadkey": "quadkey",
            "lithology_type": f"lithology_type_lattice_{LATTICE_RADIUS}",
        }
    )
    .sort(by="quadkey")
)
print(len(lithology_metrics))
lithology_metrics.head()

221152


quadkey,lithology_type_lattice_3
str,list[f64]
"""03223013331131…",[42.0]
"""03223013331131…",[42.0]
"""03223013331131…",[42.0]
"""03223013331131…",[42.0]
"""03223013331131…",[42.0]


In [28]:
# get first element from mode list
lithology_metrics = (
    lithology_metrics.with_row_index()
    .with_columns(
        pl.col(f"lithology_type_lattice_{LATTICE_RADIUS}")
        .explode()
        .gather(0)
        .over(pl.col("index"))
    )
    .drop("index")
)
lithology_metrics.head()

quadkey,lithology_type_lattice_3
str,f64
"""03223013331131…",42.0
"""03223013331131…",42.0
"""03223013331131…",42.0
"""03223013331131…",42.0
"""03223013331131…",42.0


In [29]:
lattice_features = priority_city_feat.join(lithology_metrics, on="quadkey").join(
    aggregated_metrics, on="quadkey"
)

In [30]:
lattice_features.head(), lattice_features.shape

(shape: (5, 67)
 ┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 │ quadkey   ┆ MPIO_CCNC ┆ MPIO_CNMB ┆ MPIO_CNMB ┆ … ┆ clay_5-15 ┆ clay_100- ┆ distance_ ┆ distance │
 │ ---       ┆ T         ┆ R         ┆ R_EN      ┆   ┆ cm_mean_l ┆ 200cm_mea ┆ m_roads_l ┆ _m_river │
 │ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ attice_3  ┆ n_lattice ┆ attice_3  ┆ s_lattic │
 │           ┆ str       ┆ str       ┆ str       ┆   ┆ ---       ┆ _3        ┆ ---       ┆ e_3      │
 │           ┆           ┆           ┆           ┆   ┆ f64       ┆ ---       ┆ f64       ┆ ---      │
 │           ┆           ┆           ┆           ┆   ┆           ┆ f64       ┆           ┆ f64      │
 ╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
 │ 032230133 ┆ 54250     ┆ EL TARRA  ┆ EL TARRA  ┆ … ┆ 345.0     ┆ 390.84821 ┆ 898.25679 ┆ 5096.987 │
 │ 311313231 ┆           ┆           ┆           ┆   ┆           ┆

In [31]:
lattice_features = lattice_features.drop("__index_level_0__")

## Impute missing values for lattice features

In [32]:
lattice_features.select(pl.all().is_null().sum())

quadkey,MPIO_CCNCT,MPIO_CNMBR,MPIO_CNMBR_EN,DPTO_CNMBR,DPTO_CNMBR_EN,Municipio,Municipio_EN,DPTO_CCDGO,MPIO_CCDGO,MPIO_CRSLC,MPIO_NAREA,MPIO_NANO,SHAPE_AREA,SHAPE_LEN,slope_min,slope_max,slope_count,slope_median,aspect_min,aspect_max,aspect_count,aspect_median,soil_class,elevation_min,elevation_max,elevation_count,elevation_median,ndvi2023_min,ndvi2023_max,ndvi2023_count,ndvi2023_median,rainfall_mm_min,rainfall_mm_max,rainfall_mm_count,rainfall_mm_median,lithology_type,sand_5-15cm_mean,sand_100-200cm_mean,silt_5-15cm_mean,silt_100-200cm_mean,clay_5-15cm_mean,clay_100-200cm_mean,hillshade_min,hillshade_max,hillshade_count,hillshade_median,distance_m_roads,distance_m_rivers,x,y,z,lithology_type_lattice_3,elevation_median_lattice_3,slope_median_lattice_3,aspect_median_lattice_3,hillshade_median_lattice_3,rainfall_mm_median_lattice_3,sand_5-15cm_mean_lattice_3,sand_100-200cm_mean_lattice_3,silt_5-15cm_mean_lattice_3,silt_100-200cm_mean_lattice_3,clay_5-15cm_mean_lattice_3,clay_100-200cm_mean_lattice_3,distance_m_roads_lattice_3,distance_m_rivers_lattice_3
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [35]:
lattice_features.write_parquet(OUTPUT_FPATH)