In [4]:
library("tidymodels")
library("palmerpenguins")
library("vip")

theme_set(theme_gray(base_size = 16))

"package 'vip' was built under R version 4.4.2"

Attaching package: 'vip'


The following object is masked from 'package:utils':

    vi




In [5]:
penguins2 = penguins |>
    drop_na()

penguins2 |> head()

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
<fct>,<fct>,<dbl>,<dbl>,<int>,<int>,<fct>,<int>
Adelie,Torgersen,39.1,18.7,181,3750,male,2007
Adelie,Torgersen,39.5,17.4,186,3800,female,2007
Adelie,Torgersen,40.3,18.0,195,3250,female,2007
Adelie,Torgersen,36.7,19.3,193,3450,female,2007
Adelie,Torgersen,39.3,20.6,190,3650,male,2007
Adelie,Torgersen,38.9,17.8,181,3625,female,2007


## Use random forests to model `bill_length_mm`

In [6]:
mod = rand_forest() |> 
    set_engine('ranger', importance = 'impurity') |>
    set_mode('regression')

In [7]:
rec = recipe(bill_length_mm ~ ., data = penguins2) |>
    step_rm(year) |>
    step_dummy(sex, island, species) |>
    prep(retain = TRUE)

rec |> juice() |> head()

bill_depth_mm,flipper_length_mm,body_mass_g,bill_length_mm,sex_male,island_Dream,island_Torgersen,species_Chinstrap,species_Gentoo
<dbl>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
18.7,181,3750,39.1,1,0,1,0,0
17.4,186,3800,39.5,0,0,1,0,0
18.0,195,3250,40.3,0,0,1,0,0
19.3,193,3450,36.7,0,0,1,0,0
20.6,190,3650,39.3,1,0,1,0,0
17.8,181,3625,38.9,0,0,1,0,0


In [None]:
wf = workflow() |>
  add_recipe(rec) |>
  add_model(mod)

wf_fit = wf |> fit(data = penguins2)

wf_fit

In [None]:
outcomes = augment(wf_fit |> extract_fit_parsnip(), 
                   new_data = rec |> juice())

outcomes |> head()

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

ggplot(outcomes) +
    geom_point(aes(x = bill_depth_mm, y = bill_length_mm)) + 
    geom_point(aes(x = bill_depth_mm, y = .pred), color = 'steelblue', size = 3,
               alpha = 0.5) +
    geom_segment(aes(x = bill_depth_mm, xend = bill_depth_mm, y = .pred, yend = bill_length_mm), 
                 color = 'firebrick',
                 alpha = 0.25)

## Visualize feature importance

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4)

wf_fit |> 
  extract_fit_parsnip() |> 
  vip()