-
Notifications
You must be signed in to change notification settings - Fork 47
Description
Dear, @topepo
Link to download the database used: https://github.com/forecastingEDs/Forecasting-of-admissions-in-the-emergency-departments/blob/131bd23723a39724ad4f88ad6b8e5a58f42a7960/datasets.xlsx
Reproducible example
Copy your code to the clipboard and run:
Load the following R packages ----
library(recipes)
library(tune)
library(keras)
library (kernlab)
library(modeltime.ensemble)
library(modeltime)
library(lubridate)
library(tidyquant)
library(yardstick)
library(plotly)
library(rsample)
library(targets)
library(tidymodels)
library(modeltime.resample)
library(timetk)
library(tidyverse)
library(tidyquant)
library(parsnip)
library(ranger)
library(readxl)
data_tbl <- datasets %>%
select(id, Date, attendences, average_temperature, min, max, sunday, monday, tuesday, wednesday, thursday, friday, saturday, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>%
set_names(c("id", "date", "value","tempe_verage", "tempemin", "tempemax", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
data_tbl
##---##
emergency_tscv <- data_tbl %>% filter(id == "davis") %>%
time_series_cv (
assess = "22 days",
skip = "30 days",
cumulative = TRUE,
slice_limit = 5
)
emergency_tscv
recipe_spec <- recipe(value ~ .,
data = training(emergency_tscv$splits[[1]])) %>%
step_timeseries_signature(date) %>%
step_rm(matches("(.iso$)|(.xts$)|(.lbl$)|(hour)|(minute)|(second)|(am.pm)|(date_year$)|(date_day$)|(id$)")) %>%
step_mutate(data = factor(value, ordered = TRUE))%>%
step_dummy(all_nominal(), one_hot = TRUE)%>%
step_normalize (date_index.num, date_mday7, date_week4, date_week3, date_week2, date_week,date_mweek, date_yday, date_qday, date_mday, date_wday,date_month,date_quarter,date_half, tempe_verage,tempemin,tempemax, -all_outcomes())
recipe_spec %>% prep() %>% juice() %>% glimpse()
Model 1: grid search SVM -----
wflw_svm_rbf <- workflow() %>%
add_model(
svm_rbf ("regression", cost = tune(), rbf_sigma = tune(), margin =tune()) %>% set_engine("kernlab", num.threads = 20))%>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
tune_grid(grid = 25, recipe_spec, resamples = emergency_tscv, control = control_grid(verbose = TRUE, parallel_over = "resamples", allow_par = TRUE),
metrics = metric_set(rmse))
svm_rbf_tune <- wflw_svm_rbf
show_best(svm_rbf_tune, metric = "rmse", n =15)
Model 2: grid search Random Forest by ranger ----
wflw_fit_rf <- workflow() %>%
add_model(
rand_forest("regression", mtry = tune(), trees = tune(), min_n = tune()) %>% set_engine("ranger", num.threads = 20)
) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>% # Add preprocessing steps (Note that "date" column is removed since Machine Learning algorithms don't typically know how to deal with date or date-time features)
tune_grid(grid = 25, recipe_spec, resamples = emergency_tscv, control = control_grid(verbose = TRUE, parallel_over = "resamples", allow_par = TRUE),
metrics = metric_set(rmse))
RF_tune_All_22 <- wflw_fit_rf
show_best(RF_tune_All_22, metric = "rmse", n =5)
The problem is in creating new dummy variables with the one-hot coding preprocessing step.
When running the grid search for "kernlab" and "ranger" I get the error:
i Creating pre-processing data to finalize unknown parameter: mtry
i Slice1: preprocessor 1/1
✓ Slice1: preprocessor 1/1
i Slice1: preprocessor 1/1, model 1/25
✓ Slice1: preprocessor 1/1, model 1/25
i Slice1: preprocessor 1/1, model 1/25 (extracts)
i Slice1: preprocessor 1/1, model 1/25 (predictions)
! Slice1: preprocessor 1/1, model 1/25 (predictions): There are new levels in a factor: 166, 167
x Slice1: preprocessor 1/1, model 1/25 (predictions): Error: Missing data in columns: data_001, data_002, data_003, data_004, data_005, data_006, data_007, ...
i Slice1: preprocessor 1/1, model 2/25
✓ Slice1: preprocessor 1/1, model 2/25
i Slice1: preprocessor 1/1, model 2/25 (extracts)
i Slice1: preprocessor 1/1, model 2/25 (predictions)
! Slice1: preprocessor 1/1, model 2/25 (predictions): There are new levels in a factor: 166, 167
x Slice1: preprocessor 1/1, model 2/25 (predictions): Error: Missing data in columns: data_001, data_002, data_003, data_004, data_005, data_006, data_007, ...
i Slice1: preprocessor 1/1, model 3/25
✓ Slice1: preprocessor 1/1, model 3/25
i Slice1: preprocessor 1/1, model 3/25 (extracts)
i Slice1: preprocessor 1/1, model 3/25 (predictions)
! Slice1: preprocessor 1/1, model 3/25 (predictions): There are new levels in a factor: 166, 167
x Slice1: preprocessor 1/1, model 3/25 (predictions): Error: Missing data in columns: data_001, data_002, data_003, data_004, data_005, data_006, data_007, ...
i Slice1: preprocessor 1/1, model 4/25
✓ Slice1: preprocessor 1/1, model 4/25
i Slice1: preprocessor 1/1, model 4/25 (extracts)
i Slice1: preprocessor 1/1, model 4/25 (predictions)
! Slice1: preprocessor 1/1, model 4/25 (predictions): There are new levels in a factor: 166, 167
x Slice1: preprocessor 1/1, model 4/25 (predictions): Error: Missing data in columns: data_001, data_002, data_003, data_004, data_005, data_006, data_007, ...
Generates the result of only 3 of the 5 validation folds.
A tibble: 15 × 8
trees min_n .metric .estimator mean n std_err .config
1 480 3 rmse standard 15.0 3 3.05 Preprocessor1_Model19
2 746 18 rmse standard 15.1 3 3.18 Preprocessor1_Model05
3 907 11 rmse standard 15.1 3 3.14 Preprocessor1_Model10
4 1562 7 rmse standard 15.1 3 3.18 Preprocessor1_Model15
5 496 24 rmse standard 15.2 3 3.21 Preprocessor1_Model09
6 1844 5 rmse standard 15.2 3 3.18 Preprocessor1_Model24
7 1406 15 rmse standard 15.2 3 3.18 Preprocessor1_Model18
8 986 26 rmse standard 15.2 3 3.13 Preprocessor1_Model12
9 1299 17 rmse standard 15.2 3 3.14 Preprocessor1_Model20
10 1118 20 rmse standard 15.2 3 3.18 Preprocessor1_Model07
11 1179 27 rmse standard 15.2 3 3.30 Preprocessor1_Model02
12 874 8 rmse standard 15.2 3 3.20 Preprocessor1_Model21
13 1662 21 rmse standard 15.2 3 3.21 Preprocessor1_Model04
14 1926 38 rmse standard 15.3 3 3.28 Preprocessor1_Model17
15 174 6 rmse standard 15.3 3 3.27 Preprocessor1_Model03