# R Caret Sample

In [3]:
#install.packages(c("caret", "xgboost", "mlbench", "pROC"))

In [5]:
library(caret)
library(xgboost)
library(mlbench)
library(pROC)
library(dplyr)

In [6]:
set.seed(42)

In [7]:
# 1) Data -------------------------------------------------------------------------
data(BreastCancer)
df <- BreastCancer %>%
  select(-Id) %>%
  mutate(across(-Class, ~ as.numeric(as.character(.)))) %>%  # convert to numeric
  na.omit()
df

    Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses     Class
1              5         1          1             1            2           1           3               1       1    benign
2              5         4          4             5            7          10           3               2       1    benign
3              3         1          1             1            2           2           3               1       1    benign
4              6         8          8             1            3           4           3               7       1    benign
5              4         1          1             3            2           1           3               1       1    benign
6              8        10         10             8            7          10           9               7       1 malignant
7              1         1          1             1            2          10           3               1       1    benign
8              2

In [8]:
# Make positive class the first level for caret's twoClassSummary/ROC
df$Class <- factor(df$Class, levels = c("malignant", "benign"))

In [31]:
set.seed(42)
train_idx <- createDataPartition(df$Class, p = 0.5, list = FALSE)
train_df  <- df[train_idx, ]
test_df   <- df[-train_idx, ]

In [32]:
library(gmodels)
CrossTable(train_df$Class)


 
   Cell Contents
|-------------------------|
|                       N |
|         N / Table Total |
|-------------------------|

 
Total Observations in Table:  342 

 
          | malignant |    benign | 
          |-----------|-----------|
          |       120 |       222 | 
          |     0.351 |     0.649 | 
          |-----------|-----------|



 


In [33]:
# caret requires a classification summary setup with class probabilities
ctrl_cv <- trainControl(
  method = "cv",
  number = 5,
  summaryFunction = twoClassSummary,
  classProbs = TRUE,
  verboseIter = TRUE,
  allowParallel = TRUE,
  search = "random"        # <-- random search for the first pass
)

In [34]:
# 2) RANDOMIZED SEARCH (caret, method = "xgbTree") --------------------------------
# caret's random search uses internal ranges for xgbTree when tuneLength is set.
set.seed(42)
rand_fit <- train(
  Class ~ .,
  data = train_df,
  method = "xgbTree",
  metric = "ROC",
  trControl = ctrl_cv,
  tuneLength = 5         # increase for broader random coverage
)

+ Fold1: eta=0.07137, max_depth= 7, gamma=9.889, colsample_bytree=0.6623, min_child_weight= 7, subsample=0.9300, nrounds=228 
- Fold1: eta=0.07137, max_depth= 7, gamma=9.889, colsample_bytree=0.6623, min_child_weight= 7, subsample=0.9300, nrounds=228 
+ Fold1: eta=0.27791, max_depth=10, gamma=5.603, colsample_bytree=0.3330, min_child_weight= 4, subsample=0.8747, nrounds=321 
- Fold1: eta=0.27791, max_depth=10, gamma=5.603, colsample_bytree=0.3330, min_child_weight= 4, subsample=0.8747, nrounds=321 
+ Fold1: eta=0.28552, max_depth= 4, gamma=9.467, colsample_bytree=0.4788, min_child_weight= 2, subsample=0.7088, nrounds=146 
- Fold1: eta=0.28552, max_depth= 4, gamma=9.467, colsample_bytree=0.4788, min_child_weight= 2, subsample=0.7088, nrounds=146 
+ Fold1: eta=0.56407, max_depth= 1, gamma=9.040, colsample_bytree=0.5057, min_child_weight=19, subsample=0.2555, nrounds=153 
- Fold1: eta=0.56407, max_depth= 1, gamma=9.040, colsample_bytree=0.5057, min_child_weight=19, subsample=0.2555, nroun

In [35]:
rand_fit$bestTune

  nrounds max_depth       eta    gamma colsample_bytree min_child_weight subsample
5      74         8 0.5869576 1.387102        0.4560814                1 0.4057442

In [36]:
max(rand_fit$results$ROC)

[1] 0.9878788

In [37]:
# Evaluate on test set
rand_prob <- predict(rand_fit, test_df, type = "prob")[, "malignant"]
rand_pred <- ifelse(rand_prob >= 0.5, "malignant", "benign") |> factor(levels = levels(df$Class))
cat("\nRandom search — Test AUC:", auc(response = test_df$Class, predictor = rand_prob), "\n")
cat("Random search — Test Accuracy:", mean(rand_pred == test_df$Class), "\n")

Setting levels: control = malignant, case = benign
Setting direction: controls > cases

Random search — Test AUC: 0.9923158 
Random search — Test Accuracy: 0.9530792 


In [38]:
# 3) GRID SEARCH REFINEMENT AROUND RANDOM BEST ------------------------------------
best <- rand_fit$bestTune
best

  nrounds max_depth       eta    gamma colsample_bytree min_child_weight subsample
5      74         8 0.5869576 1.387102        0.4560814                1 0.4057442

In [39]:
# helper to make a small numeric grid around a value (additive step or multiplicative spread)
around_num <- function(v, spread = 0.35, min_v = -Inf, max_v = Inf, step = NULL, round_int = FALSE) {
  vals <- if (!is.null(step)) c(v - step, v, v + step) else c(v * (1 - spread), v, v * (1 + spread))
  vals <- pmin(pmax(vals, min_v), max_v)
  vals <- unique(vals)
  if (round_int) vals <- unique(pmax(round(vals), min_v))
  sort(vals)
}

In [40]:
# Build a focused grid around the random winner
grid_refine <- expand.grid(
  nrounds           = unique(pmax(round(around_num(best$nrounds, step = 150, min_v = 100, max_v = 3000)), 1)),
  max_depth         = unique(pmax(round(c(best$max_depth - 1, best$max_depth, best$max_depth + 1)), 2)),
  eta               = around_num(best$eta,       spread = 0.35, min_v = 0.0001, max_v = 0.5),
  gamma             = around_num(best$gamma,     spread = 0.35, min_v = 0.0,    max_v = 10.0),
  colsample_bytree  = around_num(best$colsample_bytree, spread = 0.15, min_v = 0.5, max_v = 1.0),
  min_child_weight  = unique(pmax(round(c(best$min_child_weight - 1, best$min_child_weight, best$min_child_weight + 1)), 1)),
  subsample         = around_num(best$subsample, spread = 0.15, min_v = 0.5, max_v = 1.0)
)
grid_refine

    nrounds max_depth       eta     gamma colsample_bytree min_child_weight subsample
1       100         7 0.3815225 0.9016161        0.5000000                1       0.5
2       224         7 0.3815225 0.9016161        0.5000000                1       0.5
3       100         8 0.3815225 0.9016161        0.5000000                1       0.5
4       224         8 0.3815225 0.9016161        0.5000000                1       0.5
5       100         9 0.3815225 0.9016161        0.5000000                1       0.5
6       224         9 0.3815225 0.9016161        0.5000000                1       0.5
7       100         7 0.5000000 0.9016161        0.5000000                1       0.5
8       224         7 0.5000000 0.9016161        0.5000000                1       0.5
9       100         8 0.5000000 0.9016161        0.5000000                1       0.5
10      224         8 0.5000000 0.9016161        0.5000000                1       0.5
11      100         9 0.5000000 0.9016161        0.500

In [41]:
ctrl_grid <- trainControl(
  method = "cv",
  number = 5,
  summaryFunction = twoClassSummary,
  classProbs = TRUE,
  verboseIter = TRUE,
  allowParallel = TRUE,
  search = "grid"           # explicit grid search now
)

In [42]:
grid_fit <- train(
  Class ~ .,
  data = train_df,
  method = "xgbTree",
  metric = "ROC",
  trControl = ctrl_grid,
  tuneGrid = grid_refine
)

+ Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5000, min_child_weight=1, subsample=0.5, nrounds=224 
- Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5000, min_child_weight=1, subsample=0.5, nrounds=224 
+ Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5000, min_child_weight=2, subsample=0.5, nrounds=224 
- Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5000, min_child_weight=2, subsample=0.5, nrounds=224 
+ Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5245, min_child_weight=1, subsample=0.5, nrounds=224 
- Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5245, min_child_weight=1, subsample=0.5, nrounds=224 
+ Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5245, min_child_weight=2, subsample=0.5, nrounds=224 
- Fold1: eta=0.3815, max_depth=7, gamma=0.9016, colsample_bytree=0.5245, min_child_weight=2, subsample=0.5, nrounds=224 
+ Fold1: eta=0.3815, max_depth=7

In [43]:
best

  nrounds max_depth       eta    gamma colsample_bytree min_child_weight subsample
5      74         8 0.5869576 1.387102        0.4560814                1 0.4057442

In [44]:
grid_fit$bestTune

  nrounds max_depth       eta     gamma colsample_bytree min_child_weight subsample
5     100         7 0.3815225 0.9016161        0.5244936                1       0.5

In [45]:
max(grid_fit$results$ROC)

[1] 0.9910059

In [46]:
# Evaluate on test set
grid_prob <- predict(grid_fit, test_df, type = "prob")[, "malignant"]
grid_pred <- ifelse(grid_prob >= 0.5, "malignant", "benign") |> factor(levels = levels(df$Class))
cat("\nGrid search — Test AUC:", auc(response = test_df$Class, predictor = grid_prob), "\n")
cat("Grid search — Test Accuracy:", mean(grid_pred == test_df$Class), "\n")

Setting levels: control = malignant, case = benign
Setting direction: controls > cases

Grid search — Test AUC: 0.9957226 
Grid search — Test Accuracy: 0.9618768 


In [47]:
# 4) FINAL REFIT WITH EARLY STOPPING (native xgboost) -----------------------------
# Use a small validation split from the training set to drive early stopping.
set.seed(123)
val_idx <- createDataPartition(train_df$Class, p = 0.2, list = FALSE)
val_df  <- train_df[val_idx, ]
trn_df  <- train_df[-val_idx, ]

In [48]:
# Prepare matrices
x_tr  <- as.matrix(trn_df %>% select(-Class))
y_tr  <- ifelse(trn_df$Class == "malignant", 1, 0)
x_val <- as.matrix(val_df %>% select(-Class))
y_val <- ifelse(val_df$Class == "malignant", 1, 0)
x_te  <- as.matrix(test_df %>% select(-Class))
y_te  <- ifelse(test_df$Class == "malignant", 1, 0)

dtr  <- xgb.DMatrix(x_tr,  label = y_tr)
dval <- xgb.DMatrix(x_val, label = y_val)
dte  <- xgb.DMatrix(x_te,  label = y_te)

bt <- grid_fit$bestTune

In [53]:
params <- list(
  objective        = "binary:logistic",
  eval_metric      = "auc",
  tree_method      = "hist",           # use "gpu_hist" if you have a GPU + recent xgboost
  max_depth        = bt$max_depth,
  eta              = bt$eta,
  subsample        = bt$subsample,
  colsample_bytree = bt$colsample_bytree,
  min_child_weight = bt$min_child_weight,
  gamma            = bt$gamma
)

In [54]:
final <- xgb.train(
  params = params,
  data = dtr,
  nrounds = max(1000, bt$nrounds),     # give room; early stopping will cut it
  watchlist = list(train = dtr, eval = dval),
  early_stopping_rounds = 50,
  verbose = 1
)

[1]	train-auc:0.976724	eval-auc:0.983796 
Multiple eval metrics are present. Will use eval_auc for early stopping.
Will train until eval_auc hasn't improved in 50 rounds.

[2]	train-auc:0.984699	eval-auc:0.984259 
[3]	train-auc:0.990025	eval-auc:0.989352 
[4]	train-auc:0.993468	eval-auc:0.989352 
[5]	train-auc:0.993879	eval-auc:0.989815 
[6]	train-auc:0.993526	eval-auc:0.989815 
[7]	train-auc:0.993468	eval-auc:0.991667 
[8]	train-auc:0.993114	eval-auc:0.991667 
[9]	train-auc:0.992232	eval-auc:0.997222 
[10]	train-auc:0.994821	eval-auc:0.997222 
[11]	train-auc:0.994762	eval-auc:0.997222 
[12]	train-auc:0.994409	eval-auc:0.997222 
[13]	train-auc:0.995115	eval-auc:0.997222 
[14]	train-auc:0.995527	eval-auc:0.996296 
[15]	train-auc:0.995822	eval-auc:0.991667 
[16]	train-auc:0.995822	eval-auc:0.987963 
[17]	train-auc:0.995998	eval-auc:0.991667 
[18]	train-auc:0.995998	eval-auc:0.991667 
[19]	train-auc:0.995998	eval-auc:0.991667 
[20]	train-auc:0.995939	eval-auc:0.988889 
[21]	train-auc:0.99

In [55]:
final_pred_prob <- predict(final, dte)
final_auc <- auc(y_te, final_pred_prob)
final_pred <- ifelse(final_pred_prob >= 0.5, 1, 0)
final_acc <- mean(final_pred == y_te)

Setting levels: control = 0, case = 1
Setting direction: controls < cases


In [56]:
cat("\nFinal (early stopping) — Test AUC:", final_auc, "\n")
cat("Final (early stopping) — Test Accuracy:", final_acc, "\n")
cat("Best nrounds (after ES):", final$best_iteration, "\n")


Final (early stopping) — Test AUC: 0.9946249 
Final (early stopping) — Test Accuracy: 0.9677419 
Best nrounds (after ES): 9 


In [67]:
install.packages(c("neuralnet", "mlbench", "pROC", "dplyr", "caret"))

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.5/neuralnet_1.44.2.zip'
Content type 'application/zip' length 124816 bytes (121 KB)
downloaded 121 KB



package ‘neuralnet’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Yura\AppData\Local\Temp\RtmpywwSCo\downloaded_packages


In [68]:
# Neural network tuning with neuralnet: Random search + Grid refinement (R only)
# -----------------------------------------------------------------------------
library(neuralnet)
library(mlbench)
library(pROC)
library(dplyr)
library(caret)

set.seed(42)

# 1) Data -----------------------------------------------------------------------
data(BreastCancer)
df <- BreastCancer %>%
  select(-Id) %>%
  mutate(across(-Class, ~ as.numeric(as.character(.)))) %>%
  na.omit()

# Target: malignant = 1, benign = 0
df$y <- as.integer(df$Class == "malignant")
df <- df %>% select(-Class)

# Split (stratified)
train_idx <- createDataPartition(df$y, p = 0.5, list = FALSE)
train_df  <- df[train_idx, ]
test_df   <- df[-train_idx, ]



Attaching package: ‘neuralnet’

The following object is masked from ‘package:dplyr’:

    compute



In [69]:
# Min–max scaling based on train only
range_fit <- function(x) c(min = min(x), max = max(x))
ranges <- sapply(train_df %>% select(-y), range_fit)

scale_minmax <- function(M, ranges) {
  num <- sweep(M, 2, ranges["min", ], FUN = "-")
  den <- sweep(num, 2, (ranges["max", ] - ranges["min", ]), FUN = "/")
  den[, colSums(is.finite(as.matrix(den))) == nrow(den), drop = FALSE]  # guard if constant
}

In [70]:
x_train <- as.matrix(train_df %>% select(-y))
x_test  <- as.matrix(test_df %>% select(-y))
x_train_sc <- scale_minmax(x_train, ranges)
x_test_sc  <- scale_minmax(x_test,  ranges)
train_nn <- data.frame(x_train_sc, y = train_df$y)
test_nn  <- data.frame(x_test_sc,  y = test_df$y)

In [71]:
# Build a formula: y ~ x1 + x2 + ... + xP
feat_names <- colnames(x_train_sc)
fml <- as.formula(paste("y ~", paste(feat_names, collapse = " + ")))
fml

y ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + 
    Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses

In [60]:
set.seed(42)
train_idx <- createDataPartition(df$Class, p = 0.8, list = FALSE)
train_df  <- df[train_idx, ]
test_df   <- df[-train_idx, ]

x_train <- as.matrix(train_df %>% select(-Class))
y_train <- as.numeric(train_df$Class == "malignant")
x_test  <- as.matrix(test_df %>% select(-Class))
y_test  <- as.numeric(test_df$Class == "malignant")

In [72]:
# 2) Utilities ------------------------------------------------------------------
# Fit one neuralnet model with given hyperparameters
fit_neuralnet <- function(formula, data, hidden, learningrate = 0.01,
                          act.fct = "logistic", algorithm = "rprop+",
                          stepmax = 1e6, threshold = 0.01, lifesign = "none") {
  neuralnet(
    formula = formula,
    data = data,
    hidden = hidden,
    learningrate = learningrate,
    act.fct = act.fct,            # "logistic" or "tanh"
    linear.output = FALSE,        # classification
    algorithm = algorithm,        # "rprop+" or "backprop"
    stepmax = stepmax,
    threshold = threshold,
    lifesign = lifesign
  )
}

In [73]:
# Predict probabilities for y=1
predict_proba <- function(nn, newdata, act.fct = "logistic") {
  p <- compute(nn, newdata[, feat_names, drop = FALSE])$net.result[, 1]
  # neuralnet returns activation outputs; for tanh, map to [0,1]
  if (act.fct == "tanh") p <- (p + 1) / 2
  p <- pmin(pmax(p, 1e-9), 1 - 1e-9)  # clip
  as.numeric(p)
}

In [75]:
# K-fold CV AUC for a given hyperparameter set
cv_auc <- function(k = 5, data, hidden, learningrate, act.fct, algorithm,
                   stepmax = 1e6, threshold = 0.01) {
  folds <- createFolds(data$y, k = k, list = TRUE, returnTrain = FALSE)
  aucs <- numeric(k)
  for (i in seq_along(folds)) {
    val_idx <- folds[[i]]
    tr <- data[-val_idx, , drop = FALSE]
    va <- data[val_idx, , drop = FALSE]

    nn <- try(
      fit_neuralnet(fml, tr, hidden, learningrate, act.fct, algorithm, stepmax, threshold),
      silent = TRUE
    )
    if (inherits(nn, "try-error")) { aucs[i] <- NA; next }

    p <- predict_proba(nn, va, act.fct)
    aucs[i] <- as.numeric(tryCatch(auc(va$y, p), error = function(e) NA))
  }
  mean(aucs, na.rm = TRUE)
}

In [78]:
# 3) Random search space ---------------------------------------------------------
sample_arch <- function(max_layers = 4, min_width = 8, max_width = 64) {
  L <- sample(1:max_layers, 1)         # 1..4 hidden layers
  width <- sample(seq(min_width, max_width, by = 8), 1)
  rep(width, L)                         # e.g., c(32,32,32)
}

In [79]:
random_search <- function(n_iter = 25, data) {
  results <- vector("list", n_iter)
  for (i in seq_len(n_iter)) {
    hidden       <- sample_arch()
    learningrate <- 10 ^ runif(1, -3.5, -1.5)     # ~ 0.0003 .. 0.03
    act.fct      <- sample(c("logistic", "tanh"), 1)
    algorithm    <- sample(c("rprop+", "backprop"), 1)
    stepmax      <- sample(c(5e5, 1e6, 2e6), 1)

    auc_cv <- cv_auc(
      k = 5, data = data, hidden = hidden,
      learningrate = learningrate, act.fct = act.fct, algorithm = algorithm,
      stepmax = stepmax, threshold = 0.01
    )

    results[[i]] <- list(
      hidden = hidden, learningrate = learningrate, act.fct = act.fct,
      algorithm = algorithm, stepmax = stepmax, auc_cv = auc_cv
    )
    
    cat(sprintf("[%02d/%02d] hidden=%s lr=%.5f act=%s alg=%s stepmax=%.0f | CV AUC=%.4f\n",
                i, n_iter, paste(hidden, collapse = "-"), learningrate, act.fct,
                algorithm, stepmax, auc_cv))
  }
  # sort by AUC desc
  results[order(sapply(results, `[[`, "auc_cv"), decreasing = TRUE)]
}


In [80]:
# 4) Grid refinement around best -------------------------------------------------
around_int <- function(v, delta = 8, min_v = 4, max_v = 128) {
  sort(unique(pmin(pmax(c(v - delta, v, v + delta), min_v), max_v)))
}
around_lr <- function(v) sort(unique(c(v / 2, v, v * 1.5)))

In [81]:
make_refine_grid <- function(best) {
  # vary: width (±), layers (±1), lr (×{0.5,1,1.5}), act.fct and algorithm around winner
  base_width <- best$hidden[1]
  width_grid <- around_int(base_width, delta = 8, min_v = 8, max_v = 96)
  layer_grid <- sort(unique(pmin(pmax(length(best$hidden) + c(-1, 0, +1), 1), 5)))
  lr_grid    <- around_lr(best$learningrate)
  act_grid   <- unique(best$act.fct)
  alg_grid   <- unique(best$algorithm)
  step_grid  <- unique(best$stepmax)

  # build hidden vectors as equal-width per layer
  expand.grid(
    layers = layer_grid,
    width = width_grid,
    lr = lr_grid,
    act = act_grid,
    alg = alg_grid,
    stepmax = step_grid,
    KEEP.OUT.ATTRS = FALSE
  ) %>%
    mutate(hidden = lapply(seq_len(n()), function(i) rep(width[i], layers[i])))
}

In [82]:
grid_search_refine <- function(grid_tbl, data) {
  out <- vector("list", nrow(grid_tbl))
  for (i in seq_len(nrow(grid_tbl))) {
    g <- grid_tbl[i, ]
    auc_cv <- cv_auc(
      k = 5, data = data, hidden = g$hidden[[1]],
      learningrate = g$lr, act.fct = g$act, algorithm = g$alg,
      stepmax = g$stepmax, threshold = 0.01
    )
    out[[i]] <- list(
      hidden = g$hidden[[1]], learningrate = g$lr, act.fct = g$act,
      algorithm = g$alg, stepmax = g$stepmax, auc_cv = auc_cv
    )
    cat(sprintf(" refine [%03d/%03d] hidden=%s lr=%.5f act=%s alg=%s | CV AUC=%.4f\n",
                i, nrow(grid_tbl), paste(g$hidden[[1]], collapse = "-"),
                g$lr, g$act, g$alg, auc_cv))
  }
  out[order(sapply(out, `[[`, "auc_cv"), decreasing = TRUE)]
}

In [86]:
# 5) Run random search -----------------------------------------------------------
cat("=== Random search ===\n")
rand_res <- random_search(n_iter = 2, data = train_nn)
best_rand <- rand_res[[1]]
cat("\nBest (random):\n")
print(best_rand)

=== Random search ===
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
[01/02] hidden=24-24-24 lr=0.00172 act=logistic alg=rprop+ stepmax=2000000 | CV AUC=0.9935
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
[02/02] hidden=32-32 lr=0.00136 act=logistic alg=rprop+ stepmax=1000000 | CV AUC=0.9929

Best (random):
$hidden
[1] 24 24 24

$learningrate
[1] 0.0

In [87]:
# 6) Grid refinement -------------------------------------------------------------
cat("\n=== Grid refinement ===\n")
grid_tbl <- make_refine_grid(best_rand)
grid_res <- grid_search_refine(grid_tbl, data = train_nn)
best_grid <- grid_res[[1]]
cat("\nBest (grid):\n")
print(best_grid)


=== Grid refinement ===
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
 refine [001/027] hidden=16-16 lr=0.00086 act=logistic alg=rprop+ | CV AUC=0.9945
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
 refine [002/027] hidden=16-16-16 lr=0.00086 act=logistic alg=rprop+ | CV AUC=0.9959
Setting levels: control = 0, case = 1
Setting direction: controls < 

In [88]:
# 7) Final training on full train, evaluate on test ------------------------------
final_nn <- fit_neuralnet(
  fml,
  data = train_nn,
  hidden = best_grid$hidden,
  learningrate = best_grid$learningrate,
  act.fct = best_grid$act.fct,
  algorithm = best_grid$algorithm,
  stepmax = best_grid$stepmax,
  threshold = 0.01,
  lifesign = "minimal"
)

hidden: 32, 32, 32, 32    thresh: 0.01    rep: 1/1    steps:     101	error: 0.02032	time: 0.44 secs


In [89]:
# Evaluate on test
p_test <- predict_proba(final_nn, test_nn, best_grid$act.fct)
final_auc <- as.numeric(auc(test_nn$y, p_test))
final_acc <- mean(ifelse(p_test >= 0.5, 1, 0) == test_nn$y)

cat(sprintf("\nFinal Test AUC: %.4f\n", final_auc))
cat(sprintf("Final Test ACC: %.3f\n", final_acc))


Setting levels: control = 0, case = 1
Setting direction: controls < cases

Final Test AUC: 0.9807
Final Test ACC: 0.956


In [90]:
# Optional: show top-5 configs from each stage
topk <- function(lst, k = 5) {
  df <- do.call(rbind, lapply(lst[1:min(k, length(lst))], function(x) {
    data.frame(
      hidden = paste(x$hidden, collapse = "-"),
      learningrate = x$learningrate,
      act = x$act.fct,
      alg = x$algorithm,
      stepmax = x$stepmax,
      cv_auc = x$auc_cv
    )
  }))
  rownames(df) <- NULL
  df
}

In [91]:
cat("\nTop-5 (Random search):\n"); print(topk(rand_res, 5))
cat("\nTop-5 (Grid refinement):\n"); print(topk(grid_res, 5))


Top-5 (Random search):
    hidden learningrate      act    alg stepmax    cv_auc
1 24-24-24  0.001716238 logistic rprop+   2e+06 0.9934856
2    32-32  0.001361014 logistic rprop+   1e+06 0.9929219

Top-5 (Grid refinement):
       hidden learningrate      act    alg stepmax    cv_auc
1 32-32-32-32 0.0008581189 logistic rprop+   2e+06 0.9970377
2 24-24-24-24 0.0025743568 logistic rprop+   2e+06 0.9969148
3       32-32 0.0017162379 logistic rprop+   2e+06 0.9968524
4    32-32-32 0.0017162379 logistic rprop+   2e+06 0.9964208
5       32-32 0.0025743568 logistic rprop+   2e+06 0.9963960
