|
| 1 | +# TODO) If implementing `class::knn()`, mention that it does not have |
| 2 | +# the distance param because it uses Euclidean distance. And no `weight_func` |
| 3 | +# param. |
| 4 | + |
| 5 | +#' General Interface for K-Nearest Neighbor Models |
| 6 | +#' |
| 7 | +#' `nearest_neighbor()` is a way to generate a _specification_ of a model |
| 8 | +#' before fitting and allows the model to be created using |
| 9 | +#' different packages in R. The main arguments for the |
| 10 | +#' model are: |
| 11 | +#' \itemize{ |
| 12 | +#' \item \code{neighbors}: The number of neighbors considered at |
| 13 | +#' each prediction. |
| 14 | +#' \item \code{weight_func}: The type of kernel function that weights the |
| 15 | +#' distances between samples. |
| 16 | +#' \item \code{dist_power}: The parameter used when calculating the Minkowski |
| 17 | +#' distance. This corresponds to the Manhattan distance with `dist_power = 1` |
| 18 | +#' and the Euclidean distance with `dist_power = 2`. |
| 19 | +#' } |
| 20 | +#' These arguments are converted to their specific names at the |
| 21 | +#' time that the model is fit. Other options and argument can be |
| 22 | +#' set using the `others` argument. If left to their defaults |
| 23 | +#' here (`NULL`), the values are taken from the underlying model |
| 24 | +#' functions. If parameters need to be modified, `update()` can be used |
| 25 | +#' in lieu of recreating the object from scratch. |
| 26 | +#' |
| 27 | +#' @param mode A single character string for the type of model. |
| 28 | +#' Possible values for this model are `"unknown"`, `"regression"`, or |
| 29 | +#' `"classification"`. |
| 30 | +#' |
| 31 | +#' @param neighbors A single integer for the number of neighbors |
| 32 | +#' to consider (often called `k`). |
| 33 | +#' |
| 34 | +#' @param weight_func A *single* character for the type of kernel function used |
| 35 | +#' to weight distances between samples. Valid choices are: `"rectangular"`, |
| 36 | +#' `"triangular"`, `"epanechnikov"`, `"biweight"`, `"triweight"`, |
| 37 | +#' `"cos"`, `"inv"`, `"gaussian"`, `"rank"`, or `"optimal"`. |
| 38 | +#' |
| 39 | +#' @param dist_power A single number for the parameter used in |
| 40 | +#' calculating Minkowski distance. |
| 41 | +#' |
| 42 | +#' @param others A named list of arguments to be used by the |
| 43 | +#' underlying models (e.g., `kknn::train.kknn`). These are not evaluated |
| 44 | +#' until the model is fit and will be substituted into the model |
| 45 | +#' fit expression. |
| 46 | +#' |
| 47 | +#' @param ... Used for S3 method consistency. Any arguments passed to |
| 48 | +#' the ellipses will result in an error. Use `others` instead. |
| 49 | +#' |
| 50 | +#' @details |
| 51 | +#' The model can be created using the `fit()` function using the |
| 52 | +#' following _engines_: |
| 53 | +#' \itemize{ |
| 54 | +#' \item \pkg{R}: `"kknn"` |
| 55 | +#' } |
| 56 | +#' |
| 57 | +#' Engines may have pre-set default arguments when executing the |
| 58 | +#' model fit call. These can be changed by using the `others` |
| 59 | +#' argument to pass in the preferred values. For this type of |
| 60 | +#' model, the template of the fit calls are: |
| 61 | +#' |
| 62 | +#' \pkg{kknn} (classification or regression) |
| 63 | +#' |
| 64 | +#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::nearest_neighbor(), "kknn")} |
| 65 | +#' |
| 66 | +#' @note |
| 67 | +#' For `kknn`, the underlying modeling function used is a restricted |
| 68 | +#' version of `train.kknn()` and not `kknn()`. It is set up in this way so that |
| 69 | +#' `parsnip` can utilize the underlying `predict.train.kknn` method to predict |
| 70 | +#' on new data. This also means that a single value of that function's |
| 71 | +#' `kernel` argument (a.k.a `weight_func` here) can be supplied |
| 72 | +#' |
| 73 | +#' @seealso [varying()], [fit()] |
| 74 | +#' |
| 75 | +#' @examples |
| 76 | +#' nearest_neighbor() |
| 77 | +#' |
| 78 | +#' @export |
| 79 | +nearest_neighbor <- function(mode = "unknown", |
| 80 | + neighbors = NULL, |
| 81 | + weight_func = NULL, |
| 82 | + dist_power = NULL, |
| 83 | + others = list(), |
| 84 | + ...) { |
| 85 | + |
| 86 | + check_empty_ellipse(...) |
| 87 | + |
| 88 | + ## TODO: make a utility function here |
| 89 | + if (!(mode %in% nearest_neighbor_modes)) { |
| 90 | + stop("`mode` should be one of: ", |
| 91 | + paste0("'", nearest_neighbor_modes, "'", collapse = ", "), |
| 92 | + call. = FALSE) |
| 93 | + } |
| 94 | + |
| 95 | + if(is.numeric(neighbors) && !positive_int_scalar(neighbors)) { |
| 96 | + stop("`neighbors` must be a length 1 positive integer.", call. = FALSE) |
| 97 | + } |
| 98 | + |
| 99 | + if(is.character(weight_func) && length(weight_func) > 1) { |
| 100 | + stop("The length of `weight_func` must be 1.", call. = FALSE) |
| 101 | + } |
| 102 | + |
| 103 | + args <- list( |
| 104 | + neighbors = neighbors, |
| 105 | + weight_func = weight_func, |
| 106 | + dist_power = dist_power |
| 107 | + ) |
| 108 | + |
| 109 | + no_value <- !vapply(others, is.null, logical(1)) |
| 110 | + others <- others[no_value] |
| 111 | + |
| 112 | + # write a constructor function |
| 113 | + out <- list(args = args, others = others, |
| 114 | + mode = mode, method = NULL, engine = NULL) |
| 115 | + # TODO: make_classes has wrong order; go from specific to general |
| 116 | + class(out) <- make_classes("nearest_neighbor") |
| 117 | + out |
| 118 | +} |
| 119 | + |
| 120 | +#' @export |
| 121 | +print.nearest_neighbor <- function(x, ...) { |
| 122 | + cat("K-Nearest Neighbor Model Specification (", x$mode, ")\n\n", sep = "") |
| 123 | + model_printer(x, ...) |
| 124 | + |
| 125 | + if(!is.null(x$method$fit$args)) { |
| 126 | + cat("Model fit template:\n") |
| 127 | + print(show_call(x)) |
| 128 | + } |
| 129 | + invisible(x) |
| 130 | +} |
| 131 | + |
| 132 | +# ------------------------------------------------------------------------------ |
| 133 | + |
| 134 | +#' @export |
| 135 | +update.nearest_neighbor <- function(object, |
| 136 | + neighbors = NULL, |
| 137 | + weight_func = NULL, |
| 138 | + dist_power = NULL, |
| 139 | + others = list(), |
| 140 | + fresh = FALSE, |
| 141 | + ...) { |
| 142 | + |
| 143 | + check_empty_ellipse(...) |
| 144 | + |
| 145 | + if(is.numeric(neighbors) && !positive_int_scalar(neighbors)) { |
| 146 | + stop("`neighbors` must be a length 1 positive integer.", call. = FALSE) |
| 147 | + } |
| 148 | + |
| 149 | + if(is.character(weight_func) && length(weight_func) > 1) { |
| 150 | + stop("The length of `weight_func` must be 1.", call. = FALSE) |
| 151 | + } |
| 152 | + |
| 153 | + args <- list( |
| 154 | + neighbors = neighbors, |
| 155 | + weight_func = weight_func, |
| 156 | + dist_power = dist_power |
| 157 | + ) |
| 158 | + |
| 159 | + if (fresh) { |
| 160 | + object$args <- args |
| 161 | + } else { |
| 162 | + null_args <- map_lgl(args, null_value) |
| 163 | + if (any(null_args)) |
| 164 | + args <- args[!null_args] |
| 165 | + if (length(args) > 0) |
| 166 | + object$args[names(args)] <- args |
| 167 | + } |
| 168 | + |
| 169 | + if (length(others) > 0) { |
| 170 | + if (fresh) |
| 171 | + object$others <- others |
| 172 | + else |
| 173 | + object$others[names(others)] <- others |
| 174 | + } |
| 175 | + |
| 176 | + object |
| 177 | +} |
| 178 | + |
| 179 | + |
| 180 | +positive_int_scalar <- function(x) { |
| 181 | + (length(x) == 1) && (x > 0) && (x %% 1 == 0) |
| 182 | +} |
0 commit comments