5050# '
5151# ' rand_forest(mode = "classification", mtry = expr(n_cols - 2))
5252# ' }
53- # '
53+ # '
5454# ' When no instance of `expr` is found in any of the argument
5555# ' values, the descriptor calculation code will not be executed.
56- # '
56+ # '
5757NULL
5858
5959get_descr_form <- function (formula , data ) {
@@ -66,24 +66,37 @@ get_descr_form <- function(formula, data) {
6666}
6767
6868get_descr_df <- function (formula , data ) {
69-
69+
7070 tmp_dat <- convert_form_to_xy_fit(formula , data , indicators = FALSE )
71-
71+
7272 if (is.factor(tmp_dat $ y )) {
73- n_levs <- table(tmp_dat $ y , dnn = NULL )
74- } else n_levs <- NA
75-
76- n_cols <- ncol(tmp_dat $ x )
77- n_preds <- ncol(convert_form_to_xy_fit(formula , data , indicators = TRUE )$ x )
78- n_obs <- nrow(data )
79- n_facts <- sum(vapply(tmp_dat $ x , is.factor , logical (1 )))
80-
73+ n_levs <- function () {
74+ table(tmp_dat $ y , dnn = NULL )
75+ }
76+ } else n_levs <- function () { NA }
77+
78+ n_cols <- function () {
79+ ncol(tmp_dat $ x )
80+ }
81+
82+ n_preds <- function () {
83+ ncol(convert_form_to_xy_fit(formula , data , indicators = TRUE )$ x )
84+ }
85+
86+ n_obs <- function () {
87+ nrow(data )
88+ }
89+
90+ n_facts <- function () {
91+ sum(vapply(tmp_dat $ x , is.factor , logical (1 )))
92+ }
93+
8194 list (
82- cols = n_cols ,
83- preds = n_preds ,
84- obs = n_obs ,
85- levs = n_levs ,
86- facts = n_facts
95+ n_cols = n_cols ,
96+ n_preds = n_preds ,
97+ n_obs = n_obs ,
98+ n_levs = n_levs ,
99+ n_facts = n_facts
87100 )
88101}
89102
@@ -93,9 +106,9 @@ get_descr_df <- function(formula, data) {
93106# ' @importFrom rlang syms sym
94107# ' @importFrom utils head
95108get_descr_spark <- function (formula , data ) {
96-
109+
97110 all_vars <- all.vars(formula )
98-
111+
99112 if (" ." %in% all_vars ){
100113 tmpdata <- dplyr :: collect(head(data , 1000 ))
101114 f_terms <- stats :: terms(formula , data = tmpdata )
@@ -106,11 +119,11 @@ get_descr_spark <- function(formula, data) {
106119 term_data <- dplyr :: select(data , !!! rlang :: syms(f_cols ))
107120 tmpdata <- dplyr :: collect(head(term_data , 1000 ))
108121 }
109-
122+
110123 f_term_labels <- attr(f_terms , " term.labels" )
111124 y_ind <- attr(f_terms , " response" )
112125 y_col <- f_cols [y_ind ]
113-
126+
114127 classes <- purrr :: map(tmpdata , class )
115128 icats <- purrr :: map_lgl(classes , ~ .x == " character" )
116129 cats <- classes [icats ]
@@ -119,14 +132,14 @@ get_descr_spark <- function(formula, data) {
119132 cat_levels <- imap(
120133 cats ,
121134 ~ {
122- p <- dplyr :: group_by(data , !! rlang :: sym(.y ))
135+ p <- dplyr :: group_by(data , !! rlang :: sym(.y ))
123136 p <- dplyr :: summarise(p )
124137 dplyr :: pull(p )
125138 }
126- )
139+ )
127140 numeric_pred <- length(f_term_labels ) - length(cat_levels )
128-
129-
141+
142+
130143 if (length(cat_levels ) > 0 ){
131144 n_dummies <- purrr :: map_dbl(cat_levels , ~ length(.x ) - 1 )
132145 n_dummies <- sum(n_dummies )
@@ -136,27 +149,27 @@ get_descr_spark <- function(formula, data) {
136149 factor_pred <- 0
137150 all_preds <- numeric_pred
138151 }
139-
152+
140153 out_cats <- classes [icats ]
141154 out_cats <- out_cats [names(out_cats ) == y_col ]
142-
155+
143156 outs <- purrr :: imap(
144157 out_cats ,
145158 ~ {
146- p <- dplyr :: group_by(data , !! sym(.y ))
147- p <- dplyr :: tally(p )
159+ p <- dplyr :: group_by(data , !! sym(.y ))
160+ p <- dplyr :: tally(p )
148161 dplyr :: collect(p )
149162 }
150- )
151-
163+ )
164+
152165 if (length(outs ) > 0 ){
153166 outs <- outs [[1 ]]
154167 y_vals <- purrr :: as_vector(outs [,2 ])
155168 names(y_vals ) <- purrr :: as_vector(outs [,1 ])
156169 y_vals <- y_vals [order(names(y_vals ))]
157170 y_vals <- as.table(y_vals )
158171 } else y_vals <- NA
159-
172+
160173 list (
161174 cols = length(f_term_labels ),
162175 preds = all_preds ,
@@ -170,15 +183,15 @@ get_descr_xy <- function(x, y) {
170183 if (is.factor(y )) {
171184 n_levs <- table(y , dnn = NULL )
172185 } else n_levs <- NA
173-
186+
174187 n_cols <- ncol(x )
175188 n_preds <- ncol(x )
176189 n_obs <- nrow(x )
177190 n_facts <- if (is.data.frame(x ))
178191 sum(vapply(x , is.factor , logical (1 )))
179192 else
180193 sum(apply(x , 2 , is.factor )) # would this always be zero?
181-
194+
182195 list (
183196 cols = n_cols ,
184197 preds = n_preds ,
0 commit comments