# Model Roll-out

This notebook uses the most recent model to predict susceptibility for the target municipalities

### Input
- Features for target municipality
- Model instance

### Output
- predicted landslide susceptibility for each quadkey in target municipalities

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Imports and Set Up

In [20]:
import sys
import polars as pl
import pandas as pd

from shapely import wkt
import geopandas as gpd
from skops.io import load as load_model

In [21]:
sys.path.append("../../")  # include parent directory

from src.settings import DATA_DIR

In [4]:
MODEL_DIR = DATA_DIR / "models"

FEATURES_VERSION = "20240504"
LATTICE_RADIUS = 3
FEATURES_FPATH = (
    MODEL_DIR
    / f"rollout_data/rollout_data_w_lattice{LATTICE_RADIUS}_{FEATURES_VERSION}.parquet"
)


ADM_GRIDS = DATA_DIR / "admin_bounds/grids_target_muni_wadm_zoomlevel18_20240304.gpkg"

MODEL_VERSION = "20240507"
MODEL_TYPE = "classification"
MODEL_NAME = "xgboost"
LABEL = "multiclass"
MODEL_FPATH = (
    MODEL_DIR / f"pkl/{MODEL_VERSION}_{MODEL_TYPE}_{MODEL_NAME}_{LABEL}.parquet"
)

OUTPUT_VERSION = pd.to_datetime("today").strftime("%Y%m%d")
OUTPUT_FPATH = DATA_DIR / f"output/component_1/{OUTPUT_VERSION}_rollout_preds.gpkg"

## Load Data

In [5]:
features_df = pl.read_parquet(FEATURES_FPATH)

In [6]:
features_df.head(), features_df.shape

(shape: (5, 76)
 ┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 │ quadkey   ┆ MPIO_CCNC ┆ MPIO_CNMB ┆ MPIO_CNMB ┆ … ┆ clay_5-15 ┆ clay_100- ┆ distance_ ┆ distance │
 │ ---       ┆ T         ┆ R         ┆ R_EN      ┆   ┆ cm_mean_l ┆ 200cm_mea ┆ m_roads_l ┆ _m_river │
 │ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ attice_3_ ┆ n_lattice ┆ attice_3_ ┆ s_lattic │
 │           ┆ str       ┆ str       ┆ str       ┆   ┆ right     ┆ _3_ri…    ┆ right     ┆ e_3_righ │
 │           ┆           ┆           ┆           ┆   ┆ ---       ┆ ---       ┆ ---       ┆ …        │
 │           ┆           ┆           ┆           ┆   ┆ f64       ┆ f64       ┆ f64       ┆ ---      │
 │           ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ f64      │
 ╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
 │ 032230133 ┆ 54250     ┆ EL TARRA  ┆ EL TARRA  ┆ … ┆ 345.0     ┆

In [7]:
adm_grids = gpd.read_file(ADM_GRIDS)
adm_grids["geometry"] = adm_grids.geometry.apply(lambda x: wkt.dumps(x))
adm_grids = pl.from_pandas(adm_grids)


Geometry column does not contain geometry.



In [8]:
adm_grids.head()

quadkey,MPIO_CCNCT,MPIO_CNMBR,MPIO_CNMBR_EN,DPTO_CNMBR,DPTO_CNMBR_EN,Municipio,Municipio_EN,DPTO_CCDGO,MPIO_CCDGO,MPIO_CRSLC,MPIO_NAREA,MPIO_NANO,SHAPE_AREA,SHAPE_LEN,geometry
str,str,str,str,str,str,str,str,str,str,str,f64,i64,f64,f64,str
"""03223223010033…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,"""POLYGON ((-76.…"
"""03223223010033…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,"""POLYGON ((-76.…"
"""03223223010033…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,"""POLYGON ((-76.…"
"""03223223010033…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,"""POLYGON ((-76.…"
"""03223223010122…","""86749""","""SIBUNDOY""","""SIBUNDOY""","""PUTUMAYO""","""PUTUMAYO""","""Sibundoy""","""Sibundoy""","""86""","""749""","""Decreto 1871 d…",97.734625,2020,0.007922,0.511382,"""POLYGON ((-76.…"


## Load Model

In [9]:
model = load_model(MODEL_FPATH, trusted=True)
model

In [10]:
model_settings = model.model_settings_
model_settings

{'features': ['slope_median',
  'aspect_median',
  'soil_class',
  'elevation_median',
  'rainfall_mm_median',
  'lithology_type',
  'sand_5-15cm_mean',
  'sand_100-200cm_mean',
  'silt_5-15cm_mean',
  'silt_100-200cm_mean',
  'clay_5-15cm_mean',
  'clay_100-200cm_mean',
  'hillshade_median',
  'distance_m_roads',
  'distance_m_rivers',
  'lithology_type_lattice_3',
  'soil_class_lattice_3',
  'elevation_median_lattice_3',
  'slope_median_lattice_3',
  'aspect_median_lattice_3',
  'hillshade_median_lattice_3',
  'rainfall_mm_median_lattice_3',
  'sand_5-15cm_mean_lattice_3',
  'sand_100-200cm_mean_lattice_3',
  'silt_5-15cm_mean_lattice_3',
  'silt_100-200cm_mean_lattice_3',
  'clay_5-15cm_mean_lattice_3',
  'clay_100-200cm_mean_lattice_3',
  'distance_m_roads_lattice_3',
  'distance_m_rivers_lattice_3'],
 'apply_log_transform': False,
 'reverse_log_transform': False,
 'label_column': 'label_multiclass'}

# Model Roll-out

In [11]:
# the target coverage for the prediction interval is 1 - alpha
PRED_INTERVAL_ALPHA = 0.1

In [12]:
# Set up for model inference
X_rollout = features_df.select(model_settings["features"])
X_rollout.head()

slope_median,aspect_median,soil_class,elevation_median,rainfall_mm_median,lithology_type,sand_5-15cm_mean,sand_100-200cm_mean,silt_5-15cm_mean,silt_100-200cm_mean,clay_5-15cm_mean,clay_100-200cm_mean,hillshade_median,distance_m_roads,distance_m_rivers,lithology_type_lattice_3,soil_class_lattice_3,elevation_median_lattice_3,slope_median_lattice_3,aspect_median_lattice_3,hillshade_median_lattice_3,rainfall_mm_median_lattice_3,sand_5-15cm_mean_lattice_3,sand_100-200cm_mean_lattice_3,silt_5-15cm_mean_lattice_3,silt_100-200cm_mean_lattice_3,clay_5-15cm_mean_lattice_3,clay_100-200cm_mean_lattice_3,distance_m_roads_lattice_3,distance_m_rivers_lattice_3
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
0.0,0.0,10.0,0.0,1782.841309,42.0,262.75,232.25,223.75,208.75,263.5,309.25,0.0,1057.158119,5317.707214,42.0,0.0,616.535714,16.321429,103.142857,115.053571,1768.910867,331.160714,300.901786,279.017857,263.821429,345.0,390.848214,898.256797,5096.987494
0.0,0.0,0.0,0.0,1730.637817,42.0,355.0,314.0,297.0,274.0,348.0,412.0,0.0,1142.302649,5310.725048,42.0,0.0,607.95,14.183333,97.933333,112.6,1738.617017,336.033333,306.341667,279.058333,266.008333,343.075,386.166667,961.398781,5101.554011
22.0,146.0,0.0,807.0,1730.637817,42.0,348.0,311.0,296.5,278.0,355.5,411.5,147.0,996.862308,5209.769126,42.0,0.0,636.142857,16.257143,104.6,121.071429,1751.852609,334.671429,307.05,280.364286,268.042857,349.078571,389.378571,885.60206,5047.859659
0.0,0.0,0.0,0.0,1950.365967,42.0,353.0,313.0,299.0,269.0,348.0,418.0,0.0,987.113411,5334.302642,42.0,0.0,561.956522,16.652174,88.304348,106.021739,1834.327074,323.75,298.847826,281.891304,271.391304,350.836957,386.478261,746.03176,5054.777113
0.0,0.0,0.0,0.0,1873.968262,42.0,265.0,232.25,223.25,206.5,261.75,311.25,0.0,1011.826638,5323.990161,42.0,0.0,618.76,17.72,100.84,113.94,1796.676309,332.25,304.12,284.63,271.58,353.05,394.54,814.063313,5079.217917


In [13]:
%%time
pred = pl.Series("y_val_pred", model.predict(X_rollout.to_pandas()))
pred_proba = pl.Series("y_val_pred", model.predict_proba(X_rollout.to_pandas()))

CPU times: user 4.29 s, sys: 218 ms, total: 4.51 s
Wall time: 791 ms


In [14]:
label_col = model_settings["label_column"]

rollout_df = features_df.with_columns(
    pl.Series(name=f"{label_col}_pred_class", values=pred)
)
rollout_df = rollout_df.with_columns(
    pl.Series(name=f"{label_col}_pred_proba", values=pred_proba)
)
rollout_df = rollout_df.with_columns(
    pl.col(f"{label_col}_pred_proba").list.to_struct()
).unnest(f"{label_col}_pred_proba")

# Generating rename mapping based on the number of classes
proba_length = len(pred_proba[0])
rename_mapping = {f"field_{i}": f"pred_proba_{i}" for i in range(proba_length)}

# Renaming the columns
rollout_df = rollout_df.rename(rename_mapping)

In [15]:
rollout_df.head()

quadkey,MPIO_CCNCT,MPIO_CNMBR,MPIO_CNMBR_EN,DPTO_CNMBR,DPTO_CNMBR_EN,Municipio,Municipio_EN,DPTO_CCDGO,MPIO_CCDGO,MPIO_CRSLC,MPIO_NAREA,MPIO_NANO,SHAPE_AREA,SHAPE_LEN,slope_min,slope_max,slope_count,slope_median,aspect_min,aspect_max,aspect_count,aspect_median,soil_class,elevation_min,elevation_max,elevation_count,elevation_median,rainfall_mm_min,rainfall_mm_max,rainfall_mm_count,rainfall_mm_median,lithology_type,sand_5-15cm_mean,sand_100-200cm_mean,silt_5-15cm_mean,silt_100-200cm_mean,…,distance_m_roads,distance_m_rivers,x,y,z,lithology_type_lattice_3,elevation_median_lattice_3,slope_median_lattice_3,aspect_median_lattice_3,hillshade_median_lattice_3,rainfall_mm_median_lattice_3,sand_5-15cm_mean_lattice_3,sand_100-200cm_mean_lattice_3,silt_5-15cm_mean_lattice_3,silt_100-200cm_mean_lattice_3,clay_5-15cm_mean_lattice_3,clay_100-200cm_mean_lattice_3,distance_m_roads_lattice_3,distance_m_rivers_lattice_3,soil_class_lattice_3,elevation_median_lattice_3_right,slope_median_lattice_3_right,aspect_median_lattice_3_right,hillshade_median_lattice_3_right,rainfall_mm_median_lattice_3_right,sand_5-15cm_mean_lattice_3_right,sand_100-200cm_mean_lattice_3_right,silt_5-15cm_mean_lattice_3_right,silt_100-200cm_mean_lattice_3_right,clay_5-15cm_mean_lattice_3_right,clay_100-200cm_mean_lattice_3_right,distance_m_roads_lattice_3_right,distance_m_rivers_lattice_3_right,label_multiclass_pred_class,pred_proba_0,pred_proba_1,pred_proba_2
str,str,str,str,str,str,str,str,str,str,str,f64,i64,f64,f64,f64,f64,i64,f64,f64,f64,i64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f32,f32,f32
"""03223013331131…","""54250""","""EL TARRA""","""EL TARRA""","""NORTE DE SANTA…","""NORTE DE SANTA…","""El Tarra""","""El Tarra""","""54""","""250""","""Ordenanza 4 de…",704.100287,2020,0.057828,1.421222,0.0,24.0,36,0.0,0.0,165.0,272,0.0,10.0,0.0,850.0,272,0.0,1782.841309,1782.841309,1.0,1782.841309,42.0,262.75,232.25,223.75,208.75,…,1057.158119,5317.707214,77819,124718,18,42.0,616.535714,16.321429,103.142857,115.053571,1768.910867,331.160714,300.901786,279.017857,263.821429,345.0,390.848214,898.256797,5096.987494,0.0,616.535714,16.321429,103.142857,115.053571,1768.910867,331.160714,300.901786,279.017857,263.821429,345.0,390.848214,898.256797,5096.987494,2,0.103754,0.055635,0.840611
"""03223013331131…","""54250""","""EL TARRA""","""EL TARRA""","""NORTE DE SANTA…","""NORTE DE SANTA…","""El Tarra""","""El Tarra""","""54""","""250""","""Ordenanza 4 de…",704.100287,2020,0.057828,1.421222,0.0,25.0,36,0.0,0.0,201.0,256,0.0,0.0,0.0,834.0,256,0.0,1730.637817,1730.637817,1.0,1730.637817,42.0,355.0,314.0,297.0,274.0,…,1142.302649,5310.725048,77818,124719,18,42.0,607.95,14.183333,97.933333,112.6,1738.617017,336.033333,306.341667,279.058333,266.008333,343.075,386.166667,961.398781,5101.554011,0.0,607.95,14.183333,97.933333,112.6,1738.617017,336.033333,306.341667,279.058333,266.008333,343.075,386.166667,961.398781,5101.554011,2,0.247863,0.294107,0.45803
"""03223013331131…","""54250""","""EL TARRA""","""EL TARRA""","""NORTE DE SANTA…","""NORTE DE SANTA…","""El Tarra""","""El Tarra""","""54""","""250""","""Ordenanza 4 de…",704.100287,2020,0.057828,1.421222,0.0,33.0,36,22.0,0.0,185.0,272,146.0,0.0,0.0,835.0,272,807.0,1730.637817,1730.637817,1.0,1730.637817,42.0,348.0,311.0,296.5,278.0,…,996.862308,5209.769126,77819,124719,18,42.0,636.142857,16.257143,104.6,121.071429,1751.852609,334.671429,307.05,280.364286,268.042857,349.078571,389.378571,885.60206,5047.859659,0.0,636.142857,16.257143,104.6,121.071429,1751.852609,334.671429,307.05,280.364286,268.042857,349.078571,389.378571,885.60206,5047.859659,0,0.413763,0.327544,0.258693
"""03223013331131…","""54250""","""EL TARRA""","""EL TARRA""","""NORTE DE SANTA…","""NORTE DE SANTA…","""El Tarra""","""El Tarra""","""54""","""250""","""Ordenanza 4 de…",704.100287,2020,0.057828,1.421222,0.0,41.0,36,0.0,0.0,24.0,272,0.0,0.0,0.0,799.0,272,0.0,1935.738037,1964.993896,2.0,1950.365967,42.0,353.0,313.0,299.0,269.0,…,987.113411,5334.302642,77821,124716,18,42.0,561.956522,16.652174,88.304348,106.021739,1834.327074,323.75,298.847826,281.891304,271.391304,350.836957,386.478261,746.03176,5054.777113,0.0,561.956522,16.652174,88.304348,106.021739,1834.327074,323.75,298.847826,281.891304,271.391304,350.836957,386.478261,746.03176,5054.777113,1,0.37501,0.386905,0.238084
"""03223013331131…","""54250""","""EL TARRA""","""EL TARRA""","""NORTE DE SANTA…","""NORTE DE SANTA…","""El Tarra""","""El Tarra""","""54""","""250""","""Ordenanza 4 de…",704.100287,2020,0.057828,1.421222,0.0,14.0,36,0.0,0.0,121.0,256,0.0,0.0,0.0,845.0,256,0.0,1873.968262,1873.968262,1.0,1873.968262,42.0,265.0,232.25,223.25,206.5,…,1011.826638,5323.990161,77820,124717,18,42.0,618.76,17.72,100.84,113.94,1796.676309,332.25,304.12,284.63,271.58,353.05,394.54,814.063313,5079.217917,0.0,618.76,17.72,100.84,113.94,1796.676309,332.25,304.12,284.63,271.58,353.05,394.54,814.063313,5079.217917,2,0.081544,0.077903,0.840552


In [16]:
# add  geometry
rollout_df = rollout_df.join(adm_grids.select(["quadkey", "geometry"]), on="quadkey")

In [17]:
rollout_df = rollout_df.to_pandas()

In [18]:
# convert to gdf
rollout_gdf = gpd.GeoDataFrame(
    rollout_df, geometry=rollout_df.geometry.apply(wkt.loads), crs="EPSG:4326"
)

In [19]:
if OUTPUT_FPATH.exists():
    !rm -rf $OUTPUT_FPATH
    rollout_gdf.to_file(OUTPUT_FPATH, driver="GPKG")
else:
    rollout_gdf.to_file(OUTPUT_FPATH, driver="GPKG")


pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.



In [20]:
rollout_gdf.to_csv(DATA_DIR / f"output/component_1/{OUTPUT_VERSION}_rollout_preds.csv")