analysis/feature-importance.Rmd

---
title: "Feature importance analysis"
output: 
  workflowr::wflow_html:
    includes:
      in_header: header.html
editor_options:
  chunk_output_type: console
author: "Patrick Schratz"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  fig.retina = 3,
  fig.align = "center",
  fig.width = 6.93,
  fig.height = 6.13,
  out.width = "100%",
  echo = FALSE
)

library("dplyr")
library("tidyr")
library("ggpubr")
library("purrr")
library("glue")
library("fs")
R.utils::sourceDirectory("R")

# load drake objects
drake::loadd(fi_permut_vi, fi_permut_nri, fi_permut_hr)

library(dplyr)
library(hsdar)
```


```{r }
# create datasets of most imp features for each feature set
data_vi <- data.frame(
  class =
    c("VI", "VI", "VI"),
  rank =
    c(1, 3, 3),
  wavelength =
    c(404, 404, 788),
  feature = c("FeatureA", "FeatureB", "FeatureC")
)

data_nri <- data.frame(
  class =
    c("NRI", "NRI", "NRI"),
  rank =
    c(1, 3, 3),
  wavelength =
    c(4050, 470, 600),
  feature = c("FeatureA", "FeatureB", "FeatureC")
)

# create spectral signature of vegetation
# PROSAIL is a algorithm simulating spectral signatures of vegetation, see `?hsdar::PROSAIL`
# reflectance is scaled to 0-10 to be able to plot it in the same plot as the filter rankings -> the y-axes need to be the same
# PROSAIL returns a spectral signature from 400 nm to 2500 nm -> we take the values only and subset to 400 nm to 1000 nm
# because we order from 1 - 10 with 1 being the best, we have to reverse the scaling of the reflectance values
spectra_sim <- hsdar::PROSAIL()
spectra_df <- data.frame(
  reflectance = as.vector(spectra_sim@spectra@spectra_ma),
  wavelength = seq(400, 2500, 1)
) %>%
  dplyr::filter(wavelength < 1000) %>%
  # scale the reflectance to fit into the range of the y-axis for the filter ranking (the 10 - is to reverse the scale)
  dplyr::mutate(reflectance = 10 - scale(reflectance, center = FALSE, scale = max(reflectance, na.rm = TRUE) / 10))

# to be able to plot the sec y axis on the plot, we need to supply the scaled R object -> we need to save the object separately
reflectance <- as.vector(spectra_sim@spectra@spectra_ma)
reflectance_scaled <- scale(reflectance, center = FALSE, scale = max(reflectance, na.rm = TRUE) / 10)

# here we bind the simulated data with the filter rankings
# Important: Append the feature set class to the simulated data so when merged all together the facetting works (only works if we have class levels for all entries)
# This is also the reason why we cannot create an initial data.frame containing the results of all feature sets
data_vi_merged <- bind_rows(spectra_df, data_vi) %>%
  mutate(class = "VI")

data_nri_merged <- bind_rows(spectra_df, data_nri) %>%
  mutate(class = "NRI")

data_all <- bind_rows(data_vi_merged, data_nri_merged)

ggscatter(data_all,
  x = "wavelength", y = "rank", color = "class",
  # add = "segments",
  palette = "nejm",
  size = 3,
  facet.by = "class"
) +
  geom_segment(aes(x = wavelength, y = 10, xend = wavelength, yend = rank), color = "grey") +
  # scale_y_continuous(limits = c(0L, 10L), breaks = scales::pretty_breaks()) +
  scale_x_continuous(limits = c(400, 1000), breaks = scales::pretty_breaks()) +
  scale_y_reverse(limits = c(10L, 0L), breaks = scales::pretty_breaks(), sec.axis = sec_axis(~ scale(-., center = FALSE, scale = max(., na.rm = TRUE) / -1),
    labels = c(10, 0.75, 0.55, 0.25, 0),
    name = "Reflectance [%]"
  )) +
  labs(y = "Importance", x = "Wavelength [nm]", color = "Feature set") +
  geom_line(aes(x = wavelength, y = reflectance), linetype = "dotted") +
  ggrepel::geom_label_repel(label = data_all$feature, nudge_x = 0.5, nudge_y = 0.5)
```