/
feature-importance.Rmd
107 lines (93 loc) · 3.58 KB
/
feature-importance.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
---
title: "Feature importance analysis"
output:
workflowr::wflow_html:
includes:
in_header: header.html
editor_options:
chunk_output_type: console
author: "Patrick Schratz"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(
fig.retina = 3,
fig.align = "center",
fig.width = 6.93,
fig.height = 6.13,
out.width = "100%",
echo = FALSE
)
library("dplyr")
library("tidyr")
library("ggpubr")
library("purrr")
library("glue")
library("fs")
R.utils::sourceDirectory("R")
# load drake objects
drake::loadd(fi_permut_vi, fi_permut_nri, fi_permut_hr)
library(dplyr)
library(hsdar)
```
```{r }
# create datasets of most imp features for each feature set
data_vi <- data.frame(
class =
c("VI", "VI", "VI"),
rank =
c(1, 3, 3),
wavelength =
c(404, 404, 788),
feature = c("FeatureA", "FeatureB", "FeatureC")
)
data_nri <- data.frame(
class =
c("NRI", "NRI", "NRI"),
rank =
c(1, 3, 3),
wavelength =
c(4050, 470, 600),
feature = c("FeatureA", "FeatureB", "FeatureC")
)
# create spectral signature of vegetation
# PROSAIL is a algorithm simulating spectral signatures of vegetation, see `?hsdar::PROSAIL`
# reflectance is scaled to 0-10 to be able to plot it in the same plot as the filter rankings -> the y-axes need to be the same
# PROSAIL returns a spectral signature from 400 nm to 2500 nm -> we take the values only and subset to 400 nm to 1000 nm
# because we order from 1 - 10 with 1 being the best, we have to reverse the scaling of the reflectance values
spectra_sim <- hsdar::PROSAIL()
spectra_df <- data.frame(
reflectance = as.vector(spectra_sim@spectra@spectra_ma),
wavelength = seq(400, 2500, 1)
) %>%
dplyr::filter(wavelength < 1000) %>%
# scale the reflectance to fit into the range of the y-axis for the filter ranking (the 10 - is to reverse the scale)
dplyr::mutate(reflectance = 10 - scale(reflectance, center = FALSE, scale = max(reflectance, na.rm = TRUE) / 10))
# to be able to plot the sec y axis on the plot, we need to supply the scaled R object -> we need to save the object separately
reflectance <- as.vector(spectra_sim@spectra@spectra_ma)
reflectance_scaled <- scale(reflectance, center = FALSE, scale = max(reflectance, na.rm = TRUE) / 10)
# here we bind the simulated data with the filter rankings
# Important: Append the feature set class to the simulated data so when merged all together the facetting works (only works if we have class levels for all entries)
# This is also the reason why we cannot create an initial data.frame containing the results of all feature sets
data_vi_merged <- bind_rows(spectra_df, data_vi) %>%
mutate(class = "VI")
data_nri_merged <- bind_rows(spectra_df, data_nri) %>%
mutate(class = "NRI")
data_all <- bind_rows(data_vi_merged, data_nri_merged)
ggscatter(data_all,
x = "wavelength", y = "rank", color = "class",
# add = "segments",
palette = "nejm",
size = 3,
facet.by = "class"
) +
geom_segment(aes(x = wavelength, y = 10, xend = wavelength, yend = rank), color = "grey") +
# scale_y_continuous(limits = c(0L, 10L), breaks = scales::pretty_breaks()) +
scale_x_continuous(limits = c(400, 1000), breaks = scales::pretty_breaks()) +
scale_y_reverse(limits = c(10L, 0L), breaks = scales::pretty_breaks(), sec.axis = sec_axis(~ scale(-., center = FALSE, scale = max(., na.rm = TRUE) / -1),
labels = c(10, 0.75, 0.55, 0.25, 0),
name = "Reflectance [%]"
)) +
labs(y = "Importance", x = "Wavelength [nm]", color = "Feature set") +
geom_line(aes(x = wavelength, y = reflectance), linetype = "dotted") +
ggrepel::geom_label_repel(label = data_all$feature, nudge_x = 0.5, nudge_y = 0.5)
```