# Post Processing

This notebook demonstrates how to use `history.postprocessing` module. For more details on the Post-Processing : [README](README.md). 


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import history.postprocessing as pp
from history.utils import log_to_file
from pathlib import Path
import pandas as pd
import logging
import sys

## ‚öôÔ∏è General Settings

In [None]:
# level 1 directory
raw_dir = Path("/mnt/summer/USERS/DEHECQA/history/output/raw")
extracted_dir = Path("/mnt/summer/USERS/DEHECQA/history/output/extracted")
proc_dir = Path("/mnt/summer/USERS/DEHECQA/history/output/processing")
plot_dir = Path("/mnt/summer/USERS/DEHECQA/history/output/plots")

# level 2 directory
symlinks_dir = proc_dir / "symlinks"
raw_dems_dir = proc_dir / "raw_dems"
coreg_dems_dir = proc_dir / "coregistered_dems"
before_coreg_ddems_dir = proc_dir / "ddems" / "before_coregistration"
after_coreg_ddems_dir = proc_dir / "ddems" / "after_coregistration"
std_dems_dir = proc_dir / "std_dems"

OVERWRITE = False
DRY_RUN = False # set this to True to avoid process point2dem
PDAL_EXEC_PATH = "/home/godinlu/micromamba/envs/pdal/bin/pdal"
MAX_WORKERS = 4

## ‚öôÔ∏è References Data Settings
For each pair site, dataset assign 3 references files : The reference DEM and mask and the landcover.

In [None]:
references_data_mapping = {
    ("casa_grande", "aerial") : {
        "ref_dem": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/casagrande_ref_dem_zoom_5m.tif",
        "ref_dem_mask": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/casagrande_ref_dem_zoom_5m_mask.tif",
        "landcover": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/casagrande_landcover_zoom.tif"
    },
    ("casa_grande", "kh9mc") : {
        "ref_dem": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/casagrande_ref_dem_large.tif",
        "ref_dem_mask": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/casagrande_ref_dem_large_mask.tif",
        "landcover": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/casagrande_landcover_large.tif"
    },
    ("iceland", "aerial") : {
        "ref_dem": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/iceland_ref_dem_zoom_5m.tif",
        "ref_dem_mask": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/iceland_ref_dem_zoom_5m_mask.tif",
        "landcover": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/iceland_landcover_zoom.tif"
    },
    ("iceland", "kh9mc") : {
        "ref_dem": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/iceland_ref_dem_large.tif",
        "ref_dem_mask": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/iceland_ref_dem_large_mask.tif",
        "landcover": "/mnt/summer/USERS/DEHECQA/history/output/aux_data/iceland_landcover_large.tif"
    }
}

# Same references dem for KH-9 MC and KH-9 PC
references_data_mapping[("casa_grande", "kh9pc")] = references_data_mapping[("casa_grande", "kh9mc")]
references_data_mapping[("iceland", "kh9pc")] = references_data_mapping[("iceland", "kh9mc")]


references_data = pp.ReferencesData(references_data_mapping)

## ‚öôÔ∏è Logger Configuration

The `history.postprocessing` module uses a dedicated logger. To display its messages, the logger level is set to `INFO`, and a handler is attached to send output to the standard console.

You can also redirect logs to a file using the `history.utils.log_to_file` function.

In [None]:
logger = logging.getLogger("history.postprocessing")
logger.setLevel(logging.INFO)


if not logger.handlers:
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter("[%(levelname)s] %(name)s: %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)

## ‚öôÔ∏è [optional] Inliers Configuration

This step is not mandatory, but in this experiment it helps enhance the visibility and interpretability of certain plots.

In [None]:
outlier_ids = [
    "HIATUS_CG_AI_PP_CY_GY_PN_MN", "HIATUS_CG_AI_PP_CY_GY_PN_MY",
    "HSfM_CG_AI_PP_CY_GN_PY_MN", 
    "fdahle_CG_AI_PP_CN_GY_PN_MN", "fdahle_CG_AI_PP_CY_GN_PN_MN",
    "lkugl_CG_AI_PP_CN_GN_PY_MN_V1", "lkugl_CG_AI_PP_CN_GN_PY_MN_V2", "lkugl_CG_AI_PP_CN_GN_PY_MN_V3", 
    "lpierm_CG_AI_PP_CY_GY_PN_MN",
    "fdahle_IL_AI_PP_CY_GN_PN_MN", "fdahle_IL_AI_PP_CY_GY_PN_MN"
]

### üß© Step 1 ‚Äî Extract archives

In [None]:
pp.pipeline.uncompress_all_submissions(raw_dir, extracted_dir, OVERWRITE, MAX_WORKERS)

### üß© Step 2 ‚Äî Analyse submissions and create symlinks

In [None]:
pp.pipeline.index_submissions_and_link_files(extracted_dir, symlinks_dir)

**Visualization** : Create a barplot of point count in dense point-cloud file.

In [None]:
pointcloud_files = list((symlinks_dir / "dense_pointclouds").iterdir())
df = pp.stats.compute_pcs_statistics_df(pointcloud_files)
pp.viz.barplot_var(
    df,
    plot_dir / "pointcloud_point_count.png",
    "point_count",
    "Point count in dense point-cloud file",
)

**Visualization** : Visualize all founds files for each submissions.

In [None]:
symlinks_directories = list(symlinks_dir.iterdir())
pp.viz.visualize_files_presence_map(symlinks_directories)

### üß© Step 4 ‚Äî Convert point clouds to DEMs

Use the `history.utils.log_to_file` to redirect logs into a log file

In [None]:
pointcloud_files = list((symlinks_dir / "dense_pointclouds").iterdir())

with log_to_file(raw_dems_dir / "logs", logger) as log_path:
    pp.pipeline.process_pointclouds_to_dems(
        pointcloud_files,
        raw_dems_dir,
        references_data,
        PDAL_EXEC_PATH,
        OVERWRITE,
        DRY_RUN,
        MAX_WORKERS,
    )
    print(f"Log file save at {log_path}")

For submissions where the pointcloud to DEM doesn't work, add manually provided DEM. The provided DEM will be reprojected on the corresponding reference DEM.

In [None]:
dem_files = [
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/CoSP_CG_PC_PP_CY_GN_PN_MN_dem.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/CoSP_IL_PC_PP_CY_GN_PN_MN_dem.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/HIATUS_CG_AI_PP_CY_GY_PN_MN_dem.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/HIATUS_CG_AI_PP_CY_GY_PN_MY_dem.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/JB_CG_AI_PP_CY_GN_PN_MN_3m_EPSG4326_zmae_DEM.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/JB_CG_MC_PP_CY_GN_PN_MN_20m_EPSG4326_zmae_DEM.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/JB_IL_AI_PP_CY_GN_PN_MN_5m_EPSG4326_zmae_DEM.tif",
    "/mnt/summer/USERS/DEHECQA/history/output/processing/symlinks/dems/JB_IL_MC_PP_CY_GN_PN_MN_20m_EPSG4326_zmae_DEM.tif"
]

pp.pipeline.add_provided_dems(dem_files, raw_dems_dir, references_data)

**Visualization (~ 5 min)** : 
- Create a barplot of raw DEM nodata percent
- for each pairs site, dataset create a mosaic plot of raw DEMs

In [None]:
# compute raw DEMs basic statistics (can take around 5 min the first time)
df = pp.stats.compute_dems_statistics_df(raw_dems_dir.glob("*-DEM.tif"), max_workers=MAX_WORKERS)

# plot Raw DEMs nodata percent
pp.viz.barplot_var(
    df, plot_dir / "raw_dem_voids.png", "percent_nodata", "Raw DEM nodata percent"
)

# loop around group with site and dataset
for (site, dataset), group in df.groupby(["site", "dataset"]):
    output_path = plot_dir / f"{site}_{dataset}" / "mosaic" / "mosaic_raw_dem.png" 
    title = f"({site} {dataset}) Mosaic Raw DEMs" 
    vmin, vmax = group["min"].median(), group["max"].median()
    pp.viz.generate_dems_mosaic(group["file"].to_dict(), output_path, vmin, vmax, title)

### üß© Step 5 ‚Äî Coregister DEMs

In [None]:
pp.pipeline.coregister_dems(raw_dems_dir.glob("*-DEM.tif"), coreg_dems_dir, references_data, OVERWRITE, MAX_WORKERS)

**Visualization (~ 5 min)**: For each pairs site, dataset create a mosaic plot of coregister DEMs

In [None]:
# compute raw DEMs basic statistics
df = pp.stats.compute_dems_statistics_df(coreg_dems_dir.glob("*-DEM.tif"), max_workers=MAX_WORKERS)

# loop around group with site and dataset
for (site, dataset), group in df.groupby(["site", "dataset"]):
    output_path = plot_dir / f"{site}_{dataset}" / "mosaic" / "mosaic_coreg_dem.png" 
    title = f"({site} {dataset}) Mosaic Coregistered DEMs" 
    vmin, vmax = group["min"].median(), group["max"].median()
    pp.viz.generate_dems_mosaic(group["file"].to_dict(), output_path, vmin, vmax, title)

**Visualization (~ 5 min)**: For each pairs site, dataset create a coregistration shifts barplot (with a inliers version).

In [None]:
# get the coregistration shifts (should be in the raster tags of coreg DEMs)
df = pp.stats.get_coregistration_statistics_df(coreg_dems_dir.glob("*-DEM.tif"))

# loop around group with site and dataset
for (site, dataset), group in df.groupby(["site", "dataset"]):
    output_path = plot_dir / f"{site}_{dataset}" / "coregistration_shifts.png" 
    title = f"({site} {dataset}) Coregistration shifts"
    pp.viz.generate_plot_coreg_shifts(group, output_path,title)

    # genrate also coregistration with inliers only
    output_path_inliers = plot_dir / f"{site}_{dataset}" / "coregistration_shifts_inliers.png" 
    pp.viz.generate_plot_coreg_shifts(group[~group.index.isin(outlier_ids)], output_path_inliers, title)


**Visualization**: Visualize for each submissions all founds files and all generated DEMs and coregistered DEMs.


In [None]:
symlinks_directories = list(symlinks_dir.iterdir()) + [raw_dems_dir, coreg_dems_dir]
pp.viz.visualize_files_presence_map(symlinks_directories)

### üß© Step 6 ‚Äî Generate Differential DEMs (DDEMs)


In [None]:
pp.pipeline.generate_ddems(raw_dems_dir.glob("*-DEM.tif"), before_coreg_ddems_dir, references_data, OVERWRITE, MAX_WORKERS)
pp.pipeline.generate_ddems(coreg_dems_dir.glob("*-DEM.tif"), after_coreg_ddems_dir, references_data, OVERWRITE, MAX_WORKERS)

**Visualization ( ~ 5 min)**: Generate a barplot of DDEM NMAD after coregistration.

In [None]:
ddem_before_df = pp.stats.compute_dems_statistics_df(before_coreg_ddems_dir.glob("*-DDEM.tif"), "ddem_before_", MAX_WORKERS)
ddem_after_df = pp.stats.compute_dems_statistics_df(after_coreg_ddems_dir.glob("*-DDEM.tif"), "ddem_after_", MAX_WORKERS)
df = pd.concat([ddem_before_df, ddem_after_df]).groupby(level=0).first()

pp.viz.barplot_var(
    df,
    plot_dir / "nmad_after_coregistration.png",
    "ddem_after_nmad",
    "NMAD of Altitude differences with ref DEM after coregistration by code",
)

**Visualization**: for each par site, dataset generate a barplot of DDEM NMAD before and after coregistration (with an inliers version)

In [None]:
# loop around group with site and dataset
for (site, dataset), group in df.groupby(["site", "dataset"]):
    output_path = plot_dir / f"{site}_{dataset}" / "nmad_before_vs_after_coregistration.png" 
    title = f"({site} {dataset}) NMAD of DEM differences before vs after coregistration"

    pp.viz.generate_plot_nmad_before_vs_after(group,output_path, title)

    # generate also nmad plots with inliers only
    output_path_inliers = plot_dir / f"{site}_{dataset}" / "nmad_before_vs_after_coregistration_inliers.png" 
    pp.viz.generate_plot_nmad_before_vs_after(group[~group.index.isin(outlier_ids)], output_path_inliers, title)

**Visualization ( ~ 3 min)**: Generate for each coregistered dems a plot with the DDEM before coregistration and the DDEM after.

In [None]:
# loop around group with site and dataset
for (site, dataset), group in df.groupby(["site", "dataset"]):
    output_dir = plot_dir / f"{site}_{dataset}" / "coregistrations"
    pp.viz.generate_coregistration_individual_plots(group, output_dir, OVERWRITE)

**Visualization (~ 4 min)**: For each pair site, dataset generate the following mosaic plots:
- DDEM after coregistraion
- slope DDEM after coregistration
- hillshades DDEM after coregistration

In [None]:
# loop around group with site and dataset
for (site, dataset), group in df.groupby(["site", "dataset"]):
    ddem_files_dict = group["ddem_after_file"].dropna().to_dict()

    pp.viz.generate_ddems_mosaic(
        ddem_files_dict, 
        output_path = plot_dir / f"{site}_{dataset}" / "mosaic" / "mosaic_ddem.png",
        title=f"({site} {dataset}) Mosaic of DDEMs after coregistration"
    )
    pp.viz.generate_slopes_mosaic(
        ddem_files_dict, 
        output_path = plot_dir / f"{site}_{dataset}" / "mosaic" / "mosaic_slopes_ddem.png",
        title=f"({site} {dataset}) Mosaic slopes of DDEMs after coregistration"
    )
    pp.viz.generate_hillshades_mosaic(
        ddem_files_dict, 
        output_path = plot_dir / f"{site}_{dataset}" / "mosaic" / "mosaic_hillshades_ddem.png",
        title=f"({site} {dataset}) Mosaic hillshades of DDEMs after coregistration"
    )

### üß© Step 7 ‚Äî Compute landcover-based statistics

**Visualization (~ 6 min)**: For each pair site, dataset generate the following plots:
- Boxplot of Altitude difference with ref DEM by code/landcover (with inliers version)
- NMAD of Altitude difference with ref DEM by code/landcover (with inliers version)

In [None]:
landcover_df = pp.stats.compute_landcover_statistics(after_coreg_ddems_dir.glob("*-DDEM.tif"), references_data, MAX_WORKERS)


for (site, dataset), group in landcover_df.groupby(["site", "dataset"]):
    # set the sub dir with site and dataset
    sub_dir = plot_dir / f"{site}_{dataset}"

    pp.viz.generate_landcover_grouped_boxplot(
        group,
        sub_dir / "landcover_grouped_boxplot.png",
        title=f"({site} {dataset}) Boxplot of Altitude difference with ref DEM by code/landcover",
    )
    pp.viz.generate_landcover_nmad(
        group,
        sub_dir / "landcover_nmad.png",
        title=f"({site} {dataset}) NMAD of Altitude difference with ref DEM by code/landcover",
    )

    # landcover plots inliers
    group_inliers = group.loc[~group["code"].isin(outlier_ids)]
    pp.viz.generate_landcover_grouped_boxplot(
        group_inliers,
        sub_dir / "landcover_grouped_boxplot_inliers.png",
        title=f"({site} {dataset}) Boxplot of Altitude difference with ref DEM by code/landcover (inliers only)",
    )
    pp.viz.generate_landcover_nmad(
        group_inliers,
        sub_dir / "landcover_nmad_inliers.png",
        title=f"({site} {dataset}) NMAD of Altitude difference with ref DEM by code/landcover (inliers only)",
    )


### üß© Step 7 ‚Äî Generate STD DEMs
generate for each pair site, dataset 2 STD DEM: one with all DEMs and one with only inliers DEMs.

In [None]:
df = pp.io.get_filepaths_df(coreg_dem_file = coreg_dems_dir.glob("*-DEM.tif"))

for (site, dataset), group in df.groupby(["site", "dataset"]):
    dem_files = group["coreg_dem_file"].tolist()
    dem_files_inliers = group[~group.index.isin(outlier_ids)]["coreg_dem_file"]

    pp.pipeline.create_std_dem(dem_files, std_dems_dir / f"std_dem_{site}_{dataset}.tif", OVERWRITE)
    pp.pipeline.create_std_dem(dem_files_inliers, std_dems_dir / f"std_dem_{site}_{dataset}_inliers.tif", OVERWRITE)

In [None]:
for file in std_dems_dir.glob("*.tif"):
    subdir = file.stem.replace("std_dem_","").replace("_inliers","")
    output_path = plot_dir / subdir / file.with_suffix(".png").name
    pp.viz.generate_std_dem_plots(file, output_path)

### üß© Step 9 ‚Äî Compute landcover-based statistics on STD DEMs

In [None]:
std_lc_df = pp.stats.compute_landcover_statistics_on_std_dems(std_dems_dir.glob("*.tif"), references_data, MAX_WORKERS)

pp.viz.generate_landcover_grouped_boxplot_from_std_dems(
    std_lc_df, plot_dir / "landcover_boxplot_from_std_dems.png"
)

std_lc_df_inliers = std_lc_df.loc[std_lc_df["std_dem_file"].str.contains("_inliers")]
pp.viz.generate_landcover_grouped_boxplot_from_std_dems(
    std_lc_df_inliers, plot_dir / "landcover_boxplot_from_std_dems_inliers.png"
)