/
fit.nnet.R
142 lines (124 loc) · 4.41 KB
/
fit.nnet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#' @name .fit.nnet
#' @title Neural Network regression for \code{tidyfit}
#' @description Fits a single-hidden-layer neural network regression on a 'tidyFit' \code{R6} class.
#' The function can be used with \code{\link{regress}} and \code{\link{classify}}.
#'
#' @param self a 'tidyFit' R6 class.
#' @param data a data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr).
#' @return A fitted 'tidyFit' class model.
#'
#' @details **Hyperparameters:**
#'
#' - \code{size} *(number of units in the hidden layer)*
#' - \code{decay} *(parameter for weight decay)*
#' - \code{maxit} *(maximum number of iterations)*
#'
#' **Important method arguments (passed to \code{\link{m}})**
#'
#' The function provides a wrapper for \code{nnet::nnet.formula}. See \code{?nnet} for more details.
#'
#' **Implementation**
#'
#' For \code{\link{regress}}, linear output units (\code{linout=True}) are used, while \code{\link{classify}} implements
#' the default logic of \code{nnet} (\code{entropy=TRUE} for 2 target classes and \code{softmax=TRUE} for more classes).
#'
#' @author Phil Holzmeister
#'
#' @examples
#' # Load data
#' data <- tidyfit::Factor_Industry_Returns
#'
#' # Stand-alone function
#' fit <- m("nnet", Return ~ ., data)
#' fit
#'
#' # Within 'regress' function
#' fit <- regress(data, Return ~ ., m("nnet", decay=0.5, size = 8),
#' .mask = c("Date", "Industry"))
#'
#' # Within 'classify' function
#' fit <- classify(iris, Species ~ ., m("nnet", decay=0.5, size = 8))
#'
#' @importFrom purrr safely quietly
#' @importFrom stats model.frame model.matrix model.response
#' @importFrom methods formalArgs
#' @importFrom vctrs vec_as_names
.fit.nnet <- function(
self,
data = NULL
) {
# create model matrix to provide defaults for weights and size args
# ultimately nnet.formula is called rather than nnet.default as
# it handles the factor encoding for classification
mf <- stats::model.frame(self$formula, data)
x <- stats::model.matrix(self$formula, mf)
incl_intercept <- "(Intercept)" %in% colnames(x)
if (incl_intercept) x <- x[, -1]
if (self$mode == "regression") {
self$set_args(linout = TRUE, overwrite = FALSE)
}
# no default value for hidden layer size argument,
# set to 2x input neurons if not provided
self$set_args(size = 2 * NCOL(x), overwrite = FALSE)
# if missing(weights), nnet defaults to 1; but not for is.null(weights)
# so set default values here
if (is.null(self$args$weights)) {
self$set_args(weights = rep(1, NROW(x)), overwrite = TRUE)
}
ctr <- self$args[names(self$args) %in% methods::formalArgs(nnet::nnet.default)]
# ignore duplicate arguments that nnet.formula passes for nnet.default
# if these should be specified via the classify interface, switch to nnet.default
# (but requires handing of categorical variable encoding here)
ctr <- ctr[!(names(ctr) %in% c("softmax", "entropy"))]
eval_fun_ <- function(...) {
args <- list(...)
do.call(nnet::nnet.formula, args)
}
eval_fun <- purrr::safely(purrr::quietly(eval_fun_))
res <- do.call(eval_fun,
append(list(formula = self$formula, data = data), ctr))
.store_on_self(self, res)
self$estimator <- "nnet::nnet"
invisible(self)
}
.predict.nnet <- function(object, data, self = NULL, ...) {
response_var <- all.vars(self$formula)[1]
if (response_var %in% colnames(data)) {
truth_vec <- data[, response_var]
} else {
truth_vec <- NULL
}
if (self$mode == "regression") {
pred <- dplyr::tibble(
prediction = drop(stats::predict(object, data)),
truth = truth_vec
)
}
if (self$mode == "classification") {
pred <- stats::predict(object, data) %>%
dplyr::as_tibble(.name_repair = ~ vctrs::vec_as_names(..., repair = "unique", quiet = TRUE)) %>%
dplyr::mutate(truth = truth_vec) %>%
tidyr::pivot_longer(-any_of("truth"), names_to = "class", values_to = "prediction") %>%
dplyr::select(any_of(c("class", "prediction", "truth")))
}
return(pred)
}
.fitted.nnet <- function(object, self = NULL, ...) {
if (self$mode == "regression"){
fitted <- dplyr::tibble(
fitted = drop(predict(object))
)
}
if (self$mode == "classification") {
fitted <- dplyr::tibble(
fitted = predict(object, type="class")
)
}
return(fitted)
}
.resid.nnet <- function(object, self = NULL, ...) {
residuals <- dplyr::tibble(
residual = drop(object$residuals)
)
return(residuals)
}