diff --git a/NAMESPACE b/NAMESPACE
index 82bfb94..7ee9f61 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ S3method(bake,step_collapse_stringdist)
 S3method(bake,step_discretize_cart)
 S3method(bake,step_discretize_xgb)
 S3method(bake,step_embed)
+S3method(bake,step_lencode)
 S3method(bake,step_lencode_bayes)
 S3method(bake,step_lencode_glm)
 S3method(bake,step_lencode_mixed)
@@ -18,6 +19,7 @@ S3method(prep,step_collapse_stringdist)
 S3method(prep,step_discretize_cart)
 S3method(prep,step_discretize_xgb)
 S3method(prep,step_embed)
+S3method(prep,step_lencode)
 S3method(prep,step_lencode_bayes)
 S3method(prep,step_lencode_glm)
 S3method(prep,step_lencode_mixed)
@@ -31,6 +33,7 @@ S3method(print,step_collapse_stringdist)
 S3method(print,step_discretize_cart)
 S3method(print,step_discretize_xgb)
 S3method(print,step_embed)
+S3method(print,step_lencode)
 S3method(print,step_lencode_bayes)
 S3method(print,step_lencode_glm)
 S3method(print,step_lencode_mixed)
@@ -44,6 +47,7 @@ S3method(required_pkgs,step_collapse_stringdist)
 S3method(required_pkgs,step_discretize_cart)
 S3method(required_pkgs,step_discretize_xgb)
 S3method(required_pkgs,step_embed)
+S3method(required_pkgs,step_lencode)
 S3method(required_pkgs,step_lencode_bayes)
 S3method(required_pkgs,step_lencode_glm)
 S3method(required_pkgs,step_lencode_mixed)
@@ -57,6 +61,7 @@ S3method(tidy,step_collapse_stringdist)
 S3method(tidy,step_discretize_cart)
 S3method(tidy,step_discretize_xgb)
 S3method(tidy,step_embed)
+S3method(tidy,step_lencode)
 S3method(tidy,step_lencode_bayes)
 S3method(tidy,step_lencode_glm)
 S3method(tidy,step_lencode_mixed)
@@ -83,6 +88,7 @@ export(step_discretize_cart)
 export(step_discretize_xgb)
 export(step_embed)
 export(step_feature_hash)
+export(step_lencode)
 export(step_lencode_bayes)
 export(step_lencode_glm)
 export(step_lencode_mixed)
diff --git a/NEWS.md b/NEWS.md
index c4542c4..baf3618 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,8 @@
 
 * `step_feature_hash()` has been fully deprecated in favor of `textrecipes::step_dummy_hash()`. (#253)
 
+* Adds `step_lencode()` to perform analytical likelihood encoding. (#258)
+
 # embed 1.1.5
 
 ## Improvements
diff --git a/R/aaa.R b/R/aaa.R
index b33f824..d66b755 100644
--- a/R/aaa.R
+++ b/R/aaa.R
@@ -7,6 +7,7 @@ utils::globalVariables(
     ".",
     "..level",
     "..order",
+    "..value",
     ".group",
     ".rows",
     "col_names",
diff --git a/R/lencode.R b/R/lencode.R
new file mode 100644
index 0000000..a987cfc
--- /dev/null
+++ b/R/lencode.R
@@ -0,0 +1,311 @@
+#' Likelihood encoding using analytical formula
+#'
+#' `step_lencode()` creates a *specification* of a recipe step that will convert
+#' a nominal (i.e. factor) predictor into a single set of scores derived
+#' analytically.
+#'
+#' @param recipe A recipe object. The step will be added to the sequence of
+#'   operations for this recipe.
+#' @param ... One or more selector functions to choose variables. For
+#'   `step_lencode()`, this indicates the variables to be encoded into a
+#'   numeric format. See [recipes::selections()] for more details. For the
+#'   `tidy` method, these are not currently used.
+#' @param role Not used by this step since no new variables are created.
+#' @param outcome A call to `vars` to specify which variable is used as the
+#'   outcome. Only numeric and two-level factors are currently supported.
+#' @param mapping A list of tibble results that define the encoding. This is
+#'   `NULL` until the step is trained by [recipes::prep()].
+#' @param skip A logical. Should the step be skipped when the recipe is baked by
+#'   [recipes::bake()]? While all operations are baked when [recipes::prep()] is
+#'   run, some operations may not be able to be conducted on new data (e.g.
+#'   processing the outcome variable(s)). Care should be taken when using `skip
+#'   = TRUE` as it may affect the computations for subsequent operations
+#' @param trained A logical to indicate if the quantities for preprocessing have
+#'   been estimated.
+#' @param id A character string that is unique to this step to identify it.
+#' @return An updated version of `recipe` with the new step added to the
+#'   sequence of existing steps (if any). For the `tidy` method, a tibble with
+#'   columns `terms` (the selectors or variables for encoding), `level` (the
+#'   factor levels), and `value` (the encodings).
+#' @keywords datagen
+#' @concept preprocessing encoding
+#' @details
+#'
+#' Each selected nominal predictor will be replaced by a numeric predictor.
+#' Each unique value of the nominal predictor is replaced by a numeric value.
+#' Thse values are calculated differently depending on the type of the outcome.
+#'
+#' For **numeric** outcomes each value is the average value of the outcome
+#' inside each of the levels of the predictor. Unseen levels of the predictor
+#' will be using the global mean of the predictor.
+#' If case weights are used then a weighted mean is calculated instead.
+#'
+#' For **nominal** outcomes each value is the log odds of the of the first level
+#' of the outcome variable being present, within each level of the levels of the
+#' predictor. Unseen levels will be replaced by the global log odds without
+#' stratification.
+#' If case weights are used then a weighted log odds is calculated.
+#'
+#' If no or all occurances happens then the log odds is calculated using
+#' `p = (2 * nrow(data) - 1) / (2 * nrow(data))` to avoid infinity that would
+#' happen by taking the log of `0`.
+#'
+#' # Tidying
+#'
+#' When you [`tidy()`][recipes::tidy.recipe] this step, a tibble is returned
+#' with columns `level`, `value`, `terms`, and `id`:
+#'
+#' \describe{
+#'   \item{level}{character, the factor levels}
+#'   \item{value}{numeric, the encoding}
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{id}{character, id of this step}
+#' }
+#'
+#' @template case-weights-supervised
+#'
+#' @references
+#'
+#' Micci-Barreca D (2001) "A preprocessing scheme for high-cardinality
+#' categorical attributes in classification and prediction problems," ACM SIGKDD
+#' Explorations Newsletter, 3(1), 27-32.
+#'
+#' Zumel N and Mount J (2017) "vtreat: a data.frame Processor for Predictive
+#' Modeling," arXiv:1611.09477
+#'
+#' @examplesIf rlang::is_installed("modeldata")
+#' library(recipes)
+#' library(dplyr)
+#' library(modeldata)
+#'
+#' data(grants)
+#'
+#' set.seed(1)
+#' grants_other <- sample_n(grants_other, 500)
+#' reencoded <- recipe(class ~ sponsor_code, data = grants_other) |>
+#'   step_lencode(sponsor_code, outcome = vars(class)) |>
+#'   prep()
+#'
+#' bake(reencoded, grants_other)
+#'
+#' tidy(reencoded, 1)
+#' @export
+step_lencode <-
+  function(
+    recipe,
+    ...,
+    role = NA,
+    trained = FALSE,
+    outcome = NULL,
+    mapping = NULL,
+    skip = FALSE,
+    id = rand_id("lencode")
+  ) {
+    if (is.null(outcome)) {
+      cli::cli_abort("Please list a variable in {.arg outcome}.")
+    }
+    add_step(
+      recipe,
+      step_lencode_new(
+        terms = enquos(...),
+        role = role,
+        trained = trained,
+        outcome = outcome,
+        mapping = mapping,
+        skip = skip,
+        id = id,
+        case_weights = NULL
+      )
+    )
+  }
+
+step_lencode_new <-
+  function(terms, role, trained, outcome, mapping, skip, id, case_weights) {
+    step(
+      subclass = "lencode",
+      terms = terms,
+      role = role,
+      trained = trained,
+      outcome = outcome,
+      mapping = mapping,
+      skip = skip,
+      id = id,
+      case_weights = case_weights
+    )
+  }
+
+#' @export
+prep.step_lencode <- function(x, training, info = NULL, ...) {
+  col_names <- recipes_eval_select(x$terms, training, info)
+
+  wts <- get_case_weights(info, training)
+  were_weights_used <- are_weights_used(wts)
+  if (isFALSE(were_weights_used) || is.null(wts)) {
+    wts <- NULL
+  }
+
+  if (length(col_names) > 0) {
+    check_type(training[, col_names], types = c("string", "factor", "ordered"))
+    y_name <- recipes_eval_select(x$outcome, training, info)
+    res <- purrr::map(
+      training[, col_names],
+      lencode_calc,
+      y = training[[y_name]],
+      wts = wts
+    )
+  } else {
+    res <- list()
+  }
+  step_lencode_new(
+    terms = x$terms,
+    role = x$role,
+    trained = TRUE,
+    outcome = x$outcome,
+    mapping = res,
+    skip = x$skip,
+    id = x$id,
+    case_weights = were_weights_used
+  )
+}
+lencode_calc <- function(x, y, wts = NULL) {
+  if (!is.numeric(y) && !is.factor(y) && !is.character(y)) {
+    cli::cli_abort(
+      "Only works nominal or numeric {.arg outcome}, 
+      not {.obj_type_friendly {y}}."
+    )
+  }
+
+  data <- tibble::new_tibble(
+    list(..level = x, ..value = y, wts = wts)
+  )
+
+  if (is.numeric(y)) {
+    if (is.null(wts)) {
+      res <- dplyr::summarise(
+        data,
+        ..value = mean(..value),
+        .by = ..level
+      )
+      unseen_value <- mean(data$..value)
+    } else {
+      res <- dplyr::summarise(
+        data,
+        ..value = stats::weighted.mean(..value, wts),
+        .by = ..level
+      )
+      unseen_value <- stats::weighted.mean(data$..value, data$wts)
+    }
+  }
+
+  if (is.factor(y) || is.character(y)) {
+    inf_estimate_p <- (2 * nrow(data) - 1) / (2 * nrow(data))
+    inf_estimate_log_odds <- log(inf_estimate_p / (1 - inf_estimate_p))
+    if (is.null(wts)) {
+      res <- dplyr::summarize(
+        data,
+        p = (sum(..value == levels(..value)[1])) / n(),
+        .by = ..level
+      )
+
+      global_p <- (sum(data$..value == levels(data$..value)[1])) / nrow(data)
+    } else {
+      data$wts <- as.numeric(data$wts)
+      res <- dplyr::summarize(
+        data,
+        p = (sum((..value == levels(..value)[1]) * wts)) / sum(wts),
+        .by = ..level
+      )
+
+      global_p <- (sum((data$..value == levels(data$..value)[1]) * data$wts)) /
+        sum(data$wts)
+    }
+
+    res <- res |>
+      dplyr::mutate(..value = log(p / (1 - p))) |>
+      dplyr::mutate(
+        ..value = dplyr::if_else(
+          is.infinite(..value),
+          inf_estimate_log_odds,
+          ..value
+        )
+      ) |>
+      dplyr::select(-p)
+
+    unseen_value <- log(global_p / (1 - global_p))
+  }
+
+  unseen <- tibble::new_tibble(
+    list(
+      ..level = "..new",
+      ..value = unseen_value
+    )
+  )
+
+  dplyr::bind_rows(res, unseen)
+}
+
+#' @export
+bake.step_lencode <- function(object, new_data, ...) {
+  col_names <- names(object$mapping)
+  check_new_data(col_names, object, new_data)
+
+  for (col_name in col_names) {
+    new_data[[col_name]] <- map_glm_coef(
+      dat = new_data[, col_name], # map_glm_coef() expects a tibble
+      mapping = object$mapping[[col_name]]
+    )
+  }
+
+  new_data
+}
+
+#' @export
+print.step_lencode <-
+  function(x, width = max(20, options()$width - 31), ...) {
+    title <- "Linear embedding for factors via GLM for "
+    print_step(
+      names(x$mapping),
+      x$terms,
+      x$trained,
+      title,
+      width,
+      case_weights = x$case_weights
+    )
+    invisible(x)
+  }
+
+#' @rdname step_lencode
+#' @usage NULL
+#' @export
+tidy.step_lencode <- function(x, ...) {
+  if (is_trained(x)) {
+    if (length(x$mapping) == 0) {
+      res <- tibble(
+        terms = character(),
+        level = character(),
+        value = double()
+      )
+    } else {
+      for (i in seq_along(x$mapping)) {
+        x$mapping[[i]]$terms <- names(x$mapping)[i]
+      }
+      res <- bind_rows(x$mapping)
+      names(res) <- gsub("^\\.\\.", "", names(res))
+    }
+  } else {
+    term_names <- sel2char(x$terms)
+    res <- tibble(
+      terms = term_names,
+      level = rep(na_chr, length(term_names)),
+      value = rep(na_dbl, length(term_names))
+    )
+  }
+  res$id <- x$id
+  res
+}
+
+#' @rdname required_pkgs.embed
+#' @export
+required_pkgs.step_lencode <- function(x, ...) {
+  c("embed")
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 6691947..6cd1e4b 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -15,6 +15,7 @@ reference:
   contents:
     - step_embed
     - step_feature_hash
+    - step_lencode
     - step_lencode_bayes
     - step_lencode_glm
     - step_lencode_mixed
diff --git a/man/required_pkgs.embed.Rd b/man/required_pkgs.embed.Rd
index ed60312..b2267c1 100644
--- a/man/required_pkgs.embed.Rd
+++ b/man/required_pkgs.embed.Rd
@@ -1,14 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/collapse_cart.R, R/collapse_stringdist.R,
-%   R/discretize_cart.R, R/discretize_xgb.R, R/embed.R, R/lencode_bayes.R,
-%   R/lencode_glm.R, R/lencode_mixed.R, R/pca_sparse.R, R/pca_sparse_bayes.R,
-%   R/pca_truncated.R, R/umap.R, R/woe.R
+%   R/discretize_cart.R, R/discretize_xgb.R, R/embed.R, R/lencode.R,
+%   R/lencode_bayes.R, R/lencode_glm.R, R/lencode_mixed.R, R/pca_sparse.R,
+%   R/pca_sparse_bayes.R, R/pca_truncated.R, R/umap.R, R/woe.R
 \name{required_pkgs.step_collapse_cart}
 \alias{required_pkgs.step_collapse_cart}
 \alias{required_pkgs.step_collapse_stringdist}
 \alias{required_pkgs.step_discretize_cart}
 \alias{required_pkgs.step_discretize_xgb}
 \alias{required_pkgs.step_embed}
+\alias{required_pkgs.step_lencode}
 \alias{required_pkgs.step_lencode_bayes}
 \alias{required_pkgs.step_lencode_glm}
 \alias{required_pkgs.step_lencode_mixed}
@@ -29,6 +30,8 @@
 
 \method{required_pkgs}{step_embed}(x, ...)
 
+\method{required_pkgs}{step_lencode}(x, ...)
+
 \method{required_pkgs}{step_lencode_bayes}(x, ...)
 
 \method{required_pkgs}{step_lencode_glm}(x, ...)
diff --git a/man/step_lencode.Rd b/man/step_lencode.Rd
new file mode 100644
index 0000000..87d1062
--- /dev/null
+++ b/man/step_lencode.Rd
@@ -0,0 +1,125 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lencode.R
+\name{step_lencode}
+\alias{step_lencode}
+\alias{tidy.step_lencode}
+\title{Likelihood encoding using analytical formula}
+\usage{
+step_lencode(
+  recipe,
+  ...,
+  role = NA,
+  trained = FALSE,
+  outcome = NULL,
+  mapping = NULL,
+  skip = FALSE,
+  id = rand_id("lencode")
+)
+}
+\arguments{
+\item{recipe}{A recipe object. The step will be added to the sequence of
+operations for this recipe.}
+
+\item{...}{One or more selector functions to choose variables. For
+\code{step_lencode()}, this indicates the variables to be encoded into a
+numeric format. See \code{\link[recipes:selections]{recipes::selections()}} for more details. For the
+\code{tidy} method, these are not currently used.}
+
+\item{role}{Not used by this step since no new variables are created.}
+
+\item{trained}{A logical to indicate if the quantities for preprocessing have
+been estimated.}
+
+\item{outcome}{A call to \code{vars} to specify which variable is used as the
+outcome. Only numeric and two-level factors are currently supported.}
+
+\item{mapping}{A list of tibble results that define the encoding. This is
+\code{NULL} until the step is trained by \code{\link[recipes:prep]{recipes::prep()}}.}
+
+\item{skip}{A logical. Should the step be skipped when the recipe is baked by
+\code{\link[recipes:bake]{recipes::bake()}}? While all operations are baked when \code{\link[recipes:prep]{recipes::prep()}} is
+run, some operations may not be able to be conducted on new data (e.g.
+processing the outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it may affect the computations for subsequent operations}
+
+\item{id}{A character string that is unique to this step to identify it.}
+}
+\value{
+An updated version of \code{recipe} with the new step added to the
+sequence of existing steps (if any). For the \code{tidy} method, a tibble with
+columns \code{terms} (the selectors or variables for encoding), \code{level} (the
+factor levels), and \code{value} (the encodings).
+}
+\description{
+\code{step_lencode()} creates a \emph{specification} of a recipe step that will convert
+a nominal (i.e. factor) predictor into a single set of scores derived
+analytically.
+}
+\details{
+Each selected nominal predictor will be replaced by a numeric predictor.
+Each unique value of the nominal predictor is replaced by a numeric value.
+Thse values are calculated differently depending on the type of the outcome.
+
+For \strong{numeric} outcomes each value is the average value of the outcome
+inside each of the levels of the predictor. Unseen levels of the predictor
+will be using the global mean of the predictor.
+If case weights are used then a weighted mean is calculated instead.
+
+For \strong{nominal} outcomes each value is the log odds of the of the first level
+of the outcome variable being present, within each level of the levels of the
+predictor. Unseen levels will be replaced by the global log odds without
+stratification.
+If case weights are used then a weighted log odds is calculated.
+
+If no or all occurances happens then the log odds is calculated using
+\code{p = (2 * nrow(data) - 1) / (2 * nrow(data))} to avoid infinity that would
+happen by taking the log of \code{0}.
+}
+\section{Tidying}{
+When you \code{\link[recipes:tidy.recipe]{tidy()}} this step, a tibble is returned
+with columns \code{level}, \code{value}, \code{terms}, and \code{id}:
+
+\describe{
+\item{level}{character, the factor levels}
+\item{value}{numeric, the encoding}
+\item{terms}{character, the selectors or variables selected}
+\item{id}{character, id of this step}
+}
+}
+
+\section{Case weights}{
+
+
+This step performs an supervised operation that can utilize case weights.
+To use them, see the documentation in \link[recipes:case_weights]{recipes::case_weights} and the examples on
+\code{tidymodels.org}.
+}
+
+\examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(recipes)
+library(dplyr)
+library(modeldata)
+
+data(grants)
+
+set.seed(1)
+grants_other <- sample_n(grants_other, 500)
+reencoded <- recipe(class ~ sponsor_code, data = grants_other) |>
+  step_lencode(sponsor_code, outcome = vars(class)) |>
+  prep()
+
+bake(reencoded, grants_other)
+
+tidy(reencoded, 1)
+\dontshow{\}) # examplesIf}
+}
+\references{
+Micci-Barreca D (2001) "A preprocessing scheme for high-cardinality
+categorical attributes in classification and prediction problems," ACM SIGKDD
+Explorations Newsletter, 3(1), 27-32.
+
+Zumel N and Mount J (2017) "vtreat: a data.frame Processor for Predictive
+Modeling," arXiv:1611.09477
+}
+\concept{preprocessing encoding}
+\keyword{datagen}
diff --git a/tests/testthat/_snaps/lencode.md b/tests/testthat/_snaps/lencode.md
new file mode 100644
index 0000000..ac8225a
--- /dev/null
+++ b/tests/testthat/_snaps/lencode.md
@@ -0,0 +1,147 @@
+# factor outcome - factor predictor
+
+    Code
+      new_values_ch <- bake(class_test, new_data = new_dat_ch)
+    Condition
+      Warning in `bake()`:
+      ! There was 1 column that was a factor when the recipe was prepped:
+      * `x3`
+      i This may cause errors when processing new data.
+
+# factor outcome - character predictor
+
+    Code
+      new_values <- bake(class_test, new_data = new_dat_ch)
+
+# numeric outcome - factor predictor
+
+    Code
+      new_values_ch <- bake(reg_test, new_data = new_dat_ch)
+    Condition
+      Warning in `bake()`:
+      ! There was 1 column that was a factor when the recipe was prepped:
+      * `x3`
+      i This may cause errors when processing new data.
+
+# bad args
+
+    Code
+      prep(step_lencode(recipe(Species ~ ., data = three_class), Sepal.Length,
+      outcome = vars(Species)), training = three_class, retain = TRUE)
+    Condition
+      Error in `step_lencode()`:
+      Caused by error in `prep()`:
+      x All columns selected for the step should be string, factor, or ordered.
+      * 1 double variable found: `Sepal.Length`
+
+---
+
+    Code
+      prep(step_lencode(recipe(Species ~ ., data = three_class), Species, outcome = vars(
+        logical)), training = three_class, retain = TRUE)
+    Condition
+      Error in `step_lencode()`:
+      Caused by error in `purrr::map()`:
+      i In index: 1.
+      i With name: Species.
+      Caused by error in `.f()`:
+      ! Only works nominal or numeric `outcome`, not a logical vector.
+
+# case weights
+
+    Code
+      class_test
+    Message
+      
+      -- Recipe ----------------------------------------------------------------------
+      
+      -- Inputs 
+      Number of variables by role
+      outcome:      1
+      predictor:    3
+      case_weights: 1
+      
+      -- Training information 
+      Training data contained 500 data points and no incomplete rows.
+      
+      -- Operations 
+      * Linear embedding for factors via GLM for: x3 | Trained, weighted
+
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode()`:
+      ! The following required column is missing from `new_data`: x3.
+
+# empty printing
+
+    Code
+      rec
+    Message
+      
+      -- Recipe ----------------------------------------------------------------------
+      
+      -- Inputs 
+      Number of variables by role
+      outcome:    1
+      predictor: 10
+      
+      -- Operations 
+      * Linear embedding for factors via GLM for: <none>
+
+---
+
+    Code
+      rec
+    Message
+      
+      -- Recipe ----------------------------------------------------------------------
+      
+      -- Inputs 
+      Number of variables by role
+      outcome:    1
+      predictor: 10
+      
+      -- Training information 
+      Training data contained 32 data points and no incomplete rows.
+      
+      -- Operations 
+      * Linear embedding for factors via GLM for: <none> | Trained
+
+# printing
+
+    Code
+      print(rec)
+    Message
+      
+      -- Recipe ----------------------------------------------------------------------
+      
+      -- Inputs 
+      Number of variables by role
+      outcome:   1
+      predictor: 3
+      
+      -- Operations 
+      * Linear embedding for factors via GLM for: x3
+
+---
+
+    Code
+      prep(rec)
+    Message
+      
+      -- Recipe ----------------------------------------------------------------------
+      
+      -- Inputs 
+      Number of variables by role
+      outcome:   1
+      predictor: 3
+      
+      -- Training information 
+      Training data contained 500 data points and no incomplete rows.
+      
+      -- Operations 
+      * Linear embedding for factors via GLM for: x3 | Trained
+
diff --git a/tests/testthat/test-lencode.R b/tests/testthat/test-lencode.R
new file mode 100644
index 0000000..f284f64
--- /dev/null
+++ b/tests/testthat/test-lencode.R
@@ -0,0 +1,439 @@
+test_that("factor outcome - factor predictor", {
+  class_test <- recipe(x2 ~ ., data = ex_dat) |>
+    step_lencode(x3, outcome = vars(x2), id = "id") |>
+    prep(training = ex_dat, retain = TRUE)
+  tr_values <- bake(class_test, new_data = NULL)$x3
+  new_values <- bake(class_test, new_data = new_dat)
+  expect_snapshot(
+    new_values_ch <- bake(class_test, new_data = new_dat_ch)
+  )
+  key <- class_test$steps[[1]]$mapping
+  td_obj <- tidy(class_test, number = 1)
+
+  expect_equal("x3", names(key))
+
+  expect_equal(
+    length(unique(ex_dat$x3)) + 1,
+    nrow(key$x3)
+  )
+  expect_true(sum(key$x3$..level == "..new") == 1)
+
+  expect_true(is.numeric(tr_values))
+
+  expect_equal(
+    new_values$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_ch$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_ch$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values_ch$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+
+  expect_equal(
+    td_obj$level,
+    key$x3$..level
+  )
+  expect_equal(
+    td_obj$value,
+    key$x3$..value
+  )
+
+  new_values
+})
+
+test_that("factor outcome - character predictor", {
+  class_test <- recipe(x2 ~ ., data = ex_dat_ch) |>
+    step_lencode(x3, outcome = vars(x2)) |>
+    prep(training = ex_dat_ch, retain = TRUE)
+  tr_values <- bake(class_test, new_data = NULL)$x3
+  expect_snapshot(
+    new_values <- bake(class_test, new_data = new_dat_ch)
+  )
+  new_values_fc <- bake(class_test, new_data = new_dat)
+  key <- class_test$steps[[1]]$mapping
+  td_obj <- tidy(class_test, number = 1)
+
+  expect_equal("x3", names(key))
+
+  expect_equal(
+    length(unique(ex_dat$x3)) + 1,
+    nrow(key$x3)
+  )
+  expect_true(sum(key$x3$..level == "..new") == 1)
+
+  expect_true(is.numeric(tr_values))
+
+  expect_equal(
+    new_values$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_fc$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_fc$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values_fc$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+
+  expect_equal(
+    td_obj$level,
+    key$x3$..level
+  )
+  expect_equal(
+    td_obj$value,
+    key$x3$..value
+  )
+
+  unseen_level <- data.frame(
+    x1 = 0,
+    x2 = factor("a", levels = c("a", "b")),
+    x3 = "unseen-level",
+    x4 = factor("A", levels = c("A", "B", "C", "D", "E"))
+  )
+
+  expect_equal(
+    bake(class_test, unseen_level)$x3,
+    0
+  )
+})
+
+test_that("numeric outcome - factor predictor", {
+  reg_test <- recipe(x1 ~ ., data = ex_dat) |>
+    step_lencode(x3, outcome = vars(x1)) |>
+    prep(training = ex_dat, retain = TRUE)
+  tr_values <- bake(reg_test, new_data = NULL)$x3
+  new_values <- bake(reg_test, new_data = new_dat)
+  expect_snapshot(
+    new_values_ch <- bake(reg_test, new_data = new_dat_ch)
+  )
+  td_obj <- tidy(reg_test, number = 1)
+
+  key <- reg_test$steps[[1]]$mapping
+
+  expect_equal("x3", names(key))
+
+  expect_equal(
+    length(unique(ex_dat$x3)) + 1,
+    nrow(key$x3)
+  )
+  expect_true(sum(key$x3$..level == "..new") == 1)
+
+  expect_true(is.numeric(tr_values))
+
+  expect_equal(
+    new_values$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_ch$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_ch$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values_ch$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+
+  expect_equal(
+    td_obj$level,
+    key$x3$..level
+  )
+  expect_equal(
+    td_obj$value,
+    key$x3$..value
+  )
+})
+
+test_that("numeric outcome - character predictor", {
+  reg_test <- recipe(x1 ~ ., data = ex_dat_ch) |>
+    step_lencode(x3, outcome = vars(x1)) |>
+    prep(training = ex_dat_ch, retain = TRUE)
+  tr_values <- bake(reg_test, new_data = NULL)$x3
+  new_values <- bake(reg_test, new_data = new_dat_ch)
+  new_values_fc <- bake(reg_test, new_data = new_dat)
+  key <- reg_test$steps[[1]]$mapping
+  td_obj <- tidy(reg_test, number = 1)
+
+  expect_equal("x3", names(key))
+
+  expect_equal(
+    length(unique(ex_dat$x3)) + 1,
+    nrow(key$x3)
+  )
+  expect_true(sum(key$x3$..level == "..new") == 1)
+
+  expect_true(is.numeric(tr_values))
+
+  expect_equal(
+    new_values$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_fc$x3[1],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+  expect_equal(
+    new_values_fc$x3[2],
+    key$x3$..value[key$x3$..level == levels(ex_dat$x3)[1]]
+  )
+  expect_equal(
+    new_values_fc$x3[3],
+    key$x3$..value[key$x3$..level == "..new"]
+  )
+
+  expect_equal(
+    td_obj$level,
+    key$x3$..level
+  )
+  expect_equal(
+    td_obj$value,
+    key$x3$..value
+  )
+
+  unseen_level <- data.frame(
+    x1 = 0,
+    x2 = factor("a", levels = c("a", "b")),
+    x3 = "unseen-level",
+    x4 = factor("A", levels = c("A", "B", "C", "D", "E"))
+  )
+
+  expect_equal(
+    bake(reg_test, unseen_level)$x3,
+    mean(ex_dat_ch$x1)
+  )
+})
+
+test_that("non occurring events doesn't result in infinities", {
+  data <- data.frame(
+    outcome = c("a", "a", "b", "b"),
+    predictor = c("a", "a", "a", "b")
+  )
+
+  res <- recipe(outcome ~ ., data = data) |>
+    step_lencode(predictor, outcome = vars(outcome)) |>
+    prep() |>
+    tidy(1)
+
+  exp <- c(
+    log(2 / 3 / (1 - 2 / 3)),
+    log(
+      (2 * nrow(data) - 1) /
+        (2 * nrow(data)) /
+        (1 - (2 * nrow(data) - 1) / (2 * nrow(data)))
+    ),
+    log(0.5 / (1 - 0.5))
+  )
+
+  expect_identical(res$value, exp)
+  expect_identical(res$level, c("a", "b", "..new"))
+})
+
+test_that("non occurring events doesn't result in infinities - case weights", {
+  data <- data.frame(
+    outcome = c("a", "a", "b", "b"),
+    predictor = c("a", "a", "a", "b"),
+    wts = importance_weights(rep(1, 4))
+  )
+
+  res <- recipe(outcome ~ ., data = data) |>
+    step_lencode(predictor, outcome = vars(outcome)) |>
+    prep() |>
+    tidy(1)
+
+  exp <- c(
+    log(2 / 3 / (1 - 2 / 3)),
+    log(
+      (2 * nrow(data) - 1) /
+        (2 * nrow(data)) /
+        (1 - (2 * nrow(data) - 1) / (2 * nrow(data)))
+    ),
+    log(0.5 / (1 - 0.5))
+  )
+
+  expect_identical(res$value, exp)
+  expect_identical(res$level, c("a", "b", "..new"))
+})
+
+
+test_that("bad args", {
+  three_class <- iris
+  three_class$fac <- rep(letters[1:3], 50)
+  three_class$logical <- rep(c(TRUE, FALSE), 75)
+
+  expect_snapshot(
+    error = TRUE,
+    recipe(Species ~ ., data = three_class) |>
+      step_lencode(Sepal.Length, outcome = vars(Species)) |>
+      prep(training = three_class, retain = TRUE)
+  )
+
+  expect_snapshot(
+    error = TRUE,
+    recipe(Species ~ ., data = three_class) |>
+      step_lencode(Species, outcome = vars(logical)) |>
+      prep(training = three_class, retain = TRUE)
+  )
+})
+
+test_that("case weights", {
+  wts_int <- rep(c(0.9, 1), times = c(100, 400))
+
+  ex_dat_cw <- ex_dat |>
+    mutate(wts = importance_weights(wts_int))
+
+  class_test <- recipe(x2 ~ ., data = ex_dat_cw) |>
+    step_lencode(x3, outcome = vars(x2), id = "id") |>
+    prep(training = ex_dat_cw, retain = TRUE)
+
+  ref_mod <- glm(
+    x2 ~ 0 + x3,
+    data = ex_dat_cw,
+    family = binomial,
+    na.action = na.omit,
+    weights = ex_dat_cw$wts
+  )
+
+  inf_estimate_p <- (2 * nrow(ex_dat_cw) - 1) / (2 * nrow(ex_dat_cw))
+  inf_estimate_log_odds <- log(inf_estimate_p / (1 - inf_estimate_p))
+
+  exp <- tibble(
+    ..level = names(coef(ref_mod)),
+    ..value = unname(coef(ref_mod))
+  ) |>
+    mutate(
+      ..level = gsub("^x3", "", ..level),
+      ..value = -..value,
+      ..value = if_else(abs(..value) < 0.0001, 0, ..value),
+      ..value = if_else(
+        abs(round(..value, 0.4)) == max(abs(round(..value, 0.4))),
+        inf_estimate_log_odds,
+        ..value
+      )
+    ) |>
+    arrange(..level)
+
+  res <- slice_head(class_test$steps[[1]]$mapping$x3, n = -1) |>
+    arrange(..level)
+
+  expect_equal(
+    res,
+    exp,
+    tolerance = 0.00001
+  )
+
+  expect_snapshot(class_test)
+})
+
+# Infrastructure ---------------------------------------------------------------
+
+test_that("bake method errors when needed non-standard role columns are missing", {
+  rec <- recipe(x2 ~ ., data = ex_dat) |>
+    step_lencode(x3, outcome = vars(x2)) |>
+    update_role(x3, new_role = "potato") |>
+    update_role_requirements(role = "potato", bake = FALSE)
+
+  rec_trained <- prep(rec, training = ex_dat, verbose = FALSE)
+
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ex_dat[, -3])
+  )
+})
+
+test_that("empty printing", {
+  rec <- recipe(mpg ~ ., mtcars)
+  rec <- step_lencode(rec, outcome = vars(mpg))
+
+  expect_snapshot(rec)
+
+  rec <- prep(rec, mtcars)
+
+  expect_snapshot(rec)
+})
+
+test_that("empty selection prep/bake is a no-op", {
+  rec1 <- recipe(mpg ~ ., mtcars)
+  rec2 <- step_lencode(rec1, outcome = vars(mpg))
+
+  rec1 <- prep(rec1, mtcars)
+  rec2 <- prep(rec2, mtcars)
+
+  baked1 <- bake(rec1, mtcars)
+  baked2 <- bake(rec2, mtcars)
+
+  expect_identical(baked1, baked2)
+})
+
+test_that("empty selection tidy method works", {
+  rec <- recipe(mpg ~ ., mtcars)
+  rec <- step_lencode(rec, outcome = vars(mpg))
+
+  expect <- tibble(
+    terms = character(),
+    level = character(),
+    value = double(),
+    id = character()
+  )
+
+  expect_identical(tidy(rec, number = 1), expect)
+
+  rec <- prep(rec, mtcars)
+
+  expect_identical(tidy(rec, number = 1), expect)
+})
+
+test_that("printing", {
+  rec <- recipe(x2 ~ ., data = ex_dat_ch) |>
+    step_lencode(x3, outcome = vars(x2))
+
+  expect_snapshot(print(rec))
+  expect_snapshot(prep(rec))
+})