# Random Forest Models: Classification and Regression

This notebook has a similar content and structure as `Rand_Forest_ML.ipynb`, which describes how to implement Zero-Inflated Random Forest models to predict board counts at each bus stop.

 * Requiered libraries:

In [None]:
library(randomForest)
library(mlbench)
library(caret)
library(e1071)
library(dplyr)
library(tidyr)
library(readr)
library(rFerns)
library(ranger)

In [18]:
pre_board_train = read_csv('data/jmartinez/Data_for_RF_Models/Board_Counts/route_4/direction0/bus_stop_12/pre_lock_train_data.csv')
pre_board_test = read_csv('data/jmartinez/Data_for_RF_Models/Board_Counts/route_4/direction0/bus_stop_12/pre_lock_test_data.csv')


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




In [185]:
RF_Ferns_and_Ranger <- function(rt, di, st, part){
    path = paste0('data', '/', 'jmartinez', '/', 'Data_for_RF_Models', '/', 'Board_Counts', '/',
                  paste('route', rt, sep = '_'), '/', paste('direction', di, sep = ''), '/',
                  paste('bus_stop', st, sep = '_'), '/')
    if(part == 'pre'){
        file_path_train = paste(path, 'pre_lock_train_data.csv', sep = '/')
        file_path_test = paste(path, 'pre_lock_test_data.csv', sep = '/')
        
        board_train = read_csv(file_path_train)
        board_test = read_csv(file_path_test)
        
        board_train$month = factor(board_train$month)
        board_train$service_kind = factor(board_train$service_kind)
        board_train$hour = factor(board_train$hour)

        board_test$month = factor(board_test$month)
        board_test$service_kind = factor(board_test$service_kind)
        board_test$hour = factor(board_test$hour)
    }
    else if(part == 'post'){
        
        file_path_train = paste(path, 'post_lock_train_data.csv', sep = '/')
        file_path_test = paste(path, 'post_lock_test_data.csv', sep = '/')
        
        board_train = read_csv(file_path_train)
        board_test = read_csv(file_path_test)
        
        board_train$month = factor(board_train$month)
        board_train$service_kind = factor(board_train$service_kind)
        board_train$hour = factor(board_train$hour)

        board_test$month = factor(board_test$month)
        board_test$service_kind = factor(board_test$service_kind)
        board_test$hour = factor(board_test$hour)
    }
    else{
        file_path_train = paste(path, 'train_data.csv', sep = '/')
        file_path_test = paste(path, 'test_data.csv', sep = '/')
        
        board_train = read_csv(file_path_train)
        board_test = read_csv(file_path_test)
        
        board_train$month = factor(board_train$month)
        board_train$service_kind = factor(board_train$service_kind)
        board_train$hour = factor(board_train$hour)

        board_test$month = factor(board_test$month)
        board_test$service_kind = factor(board_test$service_kind)
        board_test$hour = factor(board_test$hour)
    }
    
    train_month_levels = length(levels(board_train$month))
    train_service_kind_levels = length(levels(board_train$service_kind))
    train_hour_levels = length(levels(board_train$hour))    
    
    board_test = board_test %>%
        filter(hour %in% intersect(unique(board_test$hour), unique(board_train$hour)))
    
    if(train_month_levels > 1){
        if(train_service_kind_levels > 1){
            if(train_hour_levels > 1){
                board_train = board_train 
                
                board_test = board_test 
            }
            else{
                board_train = board_train %>%
                    summarise(board_count, service_kind, month, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
                
                board_test = board_test %>%
                    summarise(board_count, service_kind, month, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
            }
        }
        else{
            if(train_hour_levels > 1){
                
                board_train = board_train %>%
                    summarise(board_count, hour, month, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
                
                board_test = board_test %>%
                    summarise(board_count, hour, month, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
            }
        }
    }
    else{
        if(train_service_kind_levels > 1){
            if(train_hour_levels > 1){
                board_train = board_train %>%
                    summarise(board_count, hour, service_kind, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
                
                board_test = board_test %>%
                    summarise(board_count, hour, service_kind, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
            }
            else{
                board_train = board_train %>%
                    summarise(board_count, service_kind, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
                
                board_test = board_test %>%
                    summarise(board_count, service_kind, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
            }
        }
        else{
            if(train_hour_levels > 1){
                board_train = board_train %>%
                    summarise(board_count, hour, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
                
                board_test = board_test %>%
                    summarise(board_count, hour, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
            }
            else{
                board_train = board_train %>%
                    summarise(board_count, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
                
                board_test = board_test %>%
                    summarise(board_count, mean_temp, mean_precip, month_average_board_count, surrounding_board_count)
            }
        }
    }
    
    n_row_train = nrow(board_train)
    
    if(n_row_train < 60){
        
        return('Insufficient data for analysis!')
    }
    else{
        y_clf_train = board_train$board_count
        y_clf_train = factor(if_else(y_clf_train == 0, 0, 1))
        
        y_clf_test = board_test$board_count
        y_clf_test = factor(if_else(y_clf_test == 0, 0, 1))
        
        Board_train_clf <- data.frame(cbind(y_clf_train, board_train[, -c(1)]))
        Board_test_clf <- data.frame(cbind(y_clf_test, board_test[, -c(1)]))
        
        #---------------------------------------------------------------------------------
        # Training characteristics for model tuning:
        #---------------------------------------------------------------------------------
        
        control <- trainControl(method='repeatedcv', 
                                number=10, 
                                repeats=3,
                                search = 'random')
        
        #---------------------------------------------------------------------------------
        # Classification using Random Ferns:
        #---------------------------------------------------------------------------------
        
        set.seed(1)
        rf_random <- train(y_clf_train ~ .,
                           data = Board_train_clf,
                           method = 'rFerns',
                           metric = 'Accuracy',
                           tuneLength  = 20, 
                           trControl = control)
        
        RF_Ferns <- print(rf_random)
                
        rf_random_pred <- predict(rf_random, newdata = Board_test_clf)
        
        rf_random_conf_mat <- confusionMatrix(y_clf_test, rf_random_pred)
        rf_random_conf_mat <- data.frame(rf_random_conf_mat[4])
        colnames(rf_random_conf_mat) <- c('Value')
        # Index for regression data:
        
        index_for_reg <- which(rf_random_pred == '1', arr.ind = T)
        #---------------------------------------------------------------------------------
        # Regression Model using Ranger:
        
                
        set.seed(1)
        rf_reg_ranger <- train(board_count ~ .,
                               data = (board_train %>% filter(board_count > 0)),
                               method = 'ranger',
                               metric = 'RMSE',
                               tuneLength  = 15, 
                               trControl = control)
        
        RF_Ranger <- print(rf_reg_ranger)
                
        #----------------------------------------------------------------------------------
        # Validation:
        Board_Test_Val = board_test
        nrow_test = (1:nrow(board_test))
        
        Board_Test_Val$index = nrow_test
        Board_test_reg = Board_Test_Val[index_for_reg, ]
        
        rf_reg_ranger_pred <- predict(rf_reg_ranger, newdata = Board_test_reg)
        Board_test_reg$Ranger_Pred = rf_reg_ranger_pred
        
        Board_Test_Val = left_join(Board_Test_Val, Board_test_reg, by = 'index')
        
        Board_Test_Val = Board_Test_Val %>%
            mutate(RF_Pred = if_else(is.na(Ranger_Pred) == T, 0, Ranger_Pred))
        
        board_test$RF_Pred = Board_Test_Val$RF_Pred
                
        RF_test_RMSE = sqrt(mean((board_test$board_count - board_test$RF_Pred)^{2}))
    }
    
    if(part == 'pre'){
        file_path_clf = paste(path, 'pre_lock_RF_Fern.txt', sep = '/')
        file_path_clf_conf_mat = paste(path, 'pre_Conf_Mat_RF_Fern.csv', sep = '/')
        
        file_path_reg = paste(path, 'pre_lock_RF_Reg.txt', sep = '/')
        file_path_RF_Chart = paste(path, 'pre_RF_Chart.csv', sep = '/')
        
        final_clf_model = paste(path, 'Pre_Random_Ferns_model.rds')
        final_reg_model = paste(path, 'Pre_Random_Forest_RANGER_model.rds')
        
        write.table(RF_Ferns, file_path_clf)
        write.csv(rf_random_conf_mat, file_path_clf_conf_mat)
        
        write.table(RF_Ranger, file_path_reg)
        write.csv(board_test, file_path_RF_Chart)
        
        saveRDS(rf_random, final_clf_model)
        saveRDS(rf_reg_ranger, final_reg_model)
        
    }
    else if(part == 'post'){
        
        file_path_clf = paste(path, 'post_lock_RF_Fern.txt', sep = '/')
        file_path_clf_conf_mat = paste(path, 'post_Conf_Mat_RF_Fern.csv', sep = '/')
        
        file_path_reg = paste(path, 'post_lock_RF_Reg.txt', sep = '/')
        file_path_RF_Chart = paste(path, 'post_RF_Chart.csv', sep = '/')
        
        final_clf_model = paste(path, 'Post_Random_Ferns_model.rds')
        final_reg_model = paste(path, 'Post_Random_Forest_RANGER_model.rds')
        
        write.table(RF_Ferns, file_path_clf)
        write.csv(rf_random_conf_mat, file_path_clf_conf_mat)
        
        write.table(RF_Ranger, file_path_reg)
        write.csv(board_test, file_path_RF_Chart)
        
        saveRDS(rf_random, final_clf_model)
        saveRDS(rf_reg_ranger, final_reg_model)
    }
    else{
        file_path_clf = paste(path, 'RF_Fern.txt', sep = '/')
        file_path_clf_conf_mat = paste(path, 'Conf_Mat_RF_Fern.csv', sep = '/')
        
        file_path_reg = paste(path, 'RF_Reg.txt', sep = '/')
        file_path_RF_Chart = paste(path, 'pre_RF_Chart.csv', sep = '/')
        
        final_clf_model = paste(path, 'Random_Ferns_model.rds')
        final_reg_model = paste(path, 'Random_Forest_RANGER_model.rds')
        
        write.table(RF_Ferns, file_path_clf)
        write.csv(rf_random_conf_mat, file_path_clf_conf_mat)
        
        write.table(RF_Ranger, file_path_reg)
        write.csv(board_test, file_path_RF_Chart)
        
        saveRDS(rf_random, final_clf_model)
        saveRDS(rf_reg_ranger, final_reg_model)
    }
    
    
    return('Done!')
}

In [3]:
Pre_lock = read_csv('data/jmartinez/Transit_Data/Pre_lock.csv')


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  .default = col_double(),
  scheduled_arrival_time = [34mcol_datetime(format = "")[39m,
  actual_arrival_time = [34mcol_datetime(format = "")[39m,
  direction_desc = [31mcol_character()[39m,
  service_period = [31mcol_character()[39m,
  date = [34mcol_date(format = "")[39m,
  scheduled_datetime = [34mcol_datetime(format = "")[39m,
  actual_arrival_datetime = [34mcol_datetime(format = "")[39m,
  trip_start_time = [34mcol_datetime(format = "")[39m,
  trip_date = [34mcol_date(format = "")[39m,
  service_kind = [31mcol_character()[39m
)
[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.




In [4]:
Post_lock = read_csv('data/jmartinez/Transit_Data/Post_lock.csv')


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  .default = col_double(),
  scheduled_arrival_time = [34mcol_datetime(format = "")[39m,
  actual_arrival_time = [34mcol_datetime(format = "")[39m,
  direction_desc = [31mcol_character()[39m,
  service_period = [31mcol_character()[39m,
  date = [34mcol_date(format = "")[39m,
  scheduled_datetime = [34mcol_datetime(format = "")[39m,
  actual_arrival_datetime = [34mcol_datetime(format = "")[39m,
  trip_start_time = [34mcol_datetime(format = "")[39m,
  trip_date = [34mcol_date(format = "")[39m,
  service_kind = [31mcol_character()[39m
)
[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.




In [5]:
Pre_lock$trip_id = as.character(Pre_lock$trip_id)
Pre_lock$stop_id = as.character(Pre_lock$stop_id)
Pre_lock$route_id = as.character(Pre_lock$route_id)
Pre_lock$direction_id = as.character(Pre_lock$direction_id)
Pre_lock$hour = factor(Pre_lock$hour)
Pre_lock$month = factor(Pre_lock$month)

In [6]:
Post_lock$trip_id = as.character(Post_lock$trip_id)
Post_lock$stop_id = as.character(Post_lock$stop_id)
Post_lock$route_id = as.character(Post_lock$route_id)
Post_lock$direction_id = as.character(Post_lock$direction_id)
Post_lock$hour = factor(Post_lock$hour)
Post_lock$month = factor(Post_lock$month)

In [25]:
bus_stops_r4_dir0_pre = Pre_lock %>%  
    filter(route_id == '4', direction_id == '0') %>%
    pull(stop_id)

bus_stops_r4_dir0_pre = unique(bus_stops_r4_dir0_pre)

In [47]:
which(bus_stops_r4_dir0_pre == '1882', arr.ind = T)

In [None]:
#

In [49]:
bus_stops_r4_dir0_pre

In [48]:
for(i in bus_stops_r4_dir0_pre[1:length(bus_stops_r4_dir0_pre)]){
    print(i)
    RF_Ferns_and_Ranger('4', '0', i, 'pre')
    print('Done!')
    print('_/|_/|_')
}

[1] NA


ERROR: Error: 'data/jmartinez/Data_for_RF_Models/Board_Counts/route_4/direction0/bus_stop_NA//pre_lock_train_data.csv' does not exist in current working directory ('/home/jmartinez').


### Post-lockdown

In [None]:
bus_stops_r4_dir0_post = Post_lock %>%  
    filter(route_id == '4', direction_id == '0') %>%
    pull(stop_id)

bus_stops_r4_dir0_post = unique(bus_stops_r4_dir0_post)
bus_stops_r4_dir0_post

In [None]:
which(bus_stops_r4_dir0_post == , arr.ind =T)

In [None]:
for(i in bus_stops_r4_dir0_post[1:length(bus_stops_r4_dir0_post)]){
    print(i)
    RF_Ferns_and_Ranger('4', '0', i, 'post')
    print('Done!')
    print('_/|_/|_')
}

In [222]:
bus_stops_r4_dir0_post = Post_lock %>%  
    filter(route_id == '4', direction_id == '1') %>%
    pull(stop_id)

bus_stops_r4_dir1_post = unique(bus_stops_r4_dir0_post)
bus_stops_r4_dir1_post

In [None]:
# 1883, 412, 996, 1878

In [229]:
which(bus_stops_r4_dir1_post == '1878', arr.ind =T)

In [None]:
for(i in bus_stops_r4_dir1_post[137:length(bus_stops_r4_dir1_post)]){
    print(i)
    RF_Ferns_and_Ranger('4', '1', i, 'post')
    print('Done!')
    print('_/|_/|_')
}

[1] "2148"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

108 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 98, 97, 98, 96, 97, 97, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9143434  0.8209537
   2     0.9146465  0.8233835
   3     0.9213131  0.8368290
   5     0.9083333  0.8109977
   6     0.9055556  0.8027743
   7     0.9028283  0.7973770
   9     0.8776768  0.7519671
  10     0.8870707  0.7668673
  11     0.8716162  0.7398755
  13     0.8591919  0.7192164
  14     0.8531313  0.7057861
  15     0.8473232  0.6987384

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 3.


## Route 1

### Direction 0

#### Pre-lockdown

In [7]:
bus_stops_r1_dir0_pre = Pre_lock %>%  
    filter(route_id == '1', direction_id == '0') %>%
    pull(stop_id)

In [8]:
bus_stops_r1_dir0_pre = unique(bus_stops_r1_dir0_pre)
bus_stops_r1_dir0_pre

In [130]:
which(bus_stops_r1_dir0_pre == '95', arr.ind = T)

In [61]:
for(i in bus_stops_r1_dir0_pre[31:length(bus_stops_r1_dir0_pre)]){
    print(i)
    RF_Ferns_and_Ranger('1', '0', i, 'pre')
    print('Done!')
    print('_/|_/|_')
}

[1] "44"


ERROR: Error: 'data/jmartinez/Data_for_RF_Models/Board_Counts/route_1/direction0/bus_stop_44//pre_lock_train_data.csv' does not exist in current working directory ('/home/jmartinez').


#### Post-lockdown

In [128]:
bus_stops_r1_dir0_post = Post_lock %>%  
    filter(route_id == '1', direction_id == '0') %>%
    pull(stop_id)

bus_stops_r1_dir0_post = unique(bus_stops_r1_dir0_post)
bus_stops_r1_dir0_post

In [None]:
# 

In [None]:
which(bus_stops_r1_dir0_post == '95', arr.ind = T)

In [129]:
for(i in bus_stops_r1_dir0_post[1:length(bus_stops_r1_dir0_post)]){
    print(i)
    RF_Ferns_and_Ranger('1', '0', i, 'post')
    print('Done!')
    print('_/|_/|_')
}

[1] "354"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [33mcol_logical()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [33mcol_logical()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "505"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "784"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "283"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [33mcol_logical()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [33mcol_logical()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "284"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "285"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "713"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [33mcol_logical()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [33mcol_logical()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "286"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "287"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"
[1] "1351"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

268 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 242, 241, 241, 240, 241, 242, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9553453  0.9003136
   2     0.9641433  0.9196890
   3     0.9580586  0.9085310
   5     0.9726462  0.9376406
   6     0.9740673  0.9406036
   7     0.9727378  0.9377077
   9     0.9690781  0.9297450
  10     0.9702652  0.9316638
  11     0.9752069  0.9432325
  13     0.9690781  0.9300764
  14     0.9727378  0.9379038
  15     0.9590117  0.9068568

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 11.
Random Forest 

191 samples
  7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 171, 173, 172, 171, 172, 173, ... 
Resampling results across tuning parameters:

  min.node.size  


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

356 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 321, 320, 321, 320, 320, 320, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9878307  0.9753318
   2     0.9962434  0.9922323
   3     0.9933598  0.9864930
   5     0.9943651  0.9883484
   6     0.9971958  0.9941931
   7     0.9962434  0.9922323
   9     0.9971958  0.9941931
  10     0.9971958  0.9941931
  11     0.9943651  0.9883940
  13     0.9925132  0.9845965
  14     0.9924868  0.9845102
  15     0.9933862  0.9864852

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 6.
Random Forest 

216 samples
  7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 195, 196, 194, 194, 194, 194, ... 
Resampling results across tuning parameters:

  min.node.size  m


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

460 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 414, 414, 414, 413, 414, 415, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9471751  0.8783467
   2     0.9601710  0.9067122
   3     0.9522931  0.8881432
   5     0.9638740  0.9139065
   6     0.9588303  0.9050693
   7     0.9595409  0.9054647
   9     0.9501970  0.8837407
  10     0.9551300  0.8937793
  11     0.9631024  0.9126259
  13     0.9609446  0.9070427
  14     0.9667880  0.9202887
  15     0.9652763  0.9174411

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 14.
Random Forest 

338 samples
  7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 304, 304, 305, 304, 304, 304, ... 
Resampling results across tuning parameters:

  min.node.size  


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

267 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 241, 240, 240, 240, 240, 241, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9975309  0.9950413
   2     0.9974834  0.9949566
   3     0.9938272  0.9876440
   5     1.0000000  1.0000000
   6     0.9987654  0.9975207
   7     1.0000000  1.0000000
   9     0.9987654  0.9975207
  10     1.0000000  1.0000000
  11     1.0000000  1.0000000
  13     1.0000000  1.0000000
  14     1.0000000  1.0000000
  15     1.0000000  1.0000000

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 5.
Random Forest 

149 samples
  7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 135, 134, 133, 134, 134, 135, ... 
Resampling results across tuning parameters:

  min.node.size  m


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

112 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 102, 101, 101, 100, 101, 102, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.8817677  0.6403904
   2     0.8698990  0.6292347
   3     0.8815152  0.6728024
   5     0.8757071  0.6630518
   6     0.8781818  0.6691631
   7     0.8787374  0.6684969
   9     0.8721212  0.6505151
  10     0.8696465  0.6589208
  11     0.8605051  0.6334940
  13     0.8691414  0.6543997
  14     0.8514141  0.6241040
  15     0.8541919  0.6285683

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.


“There were missing values in resampled performance measures.”


Random Forest 

17 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 16, 15, 15, 15, 15, 16, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE      Rsquared  MAE     
   1              5    variance    1.196698    1       1.138130
   1             12    variance    1.244740    1       1.184169
   2              2    maxstat     1.250005    1       1.184802
   5             10    extratrees  1.182185    1       1.113727
   7              3    maxstat     1.241396    1       1.181399
   7              5    extratrees  1.178740    1       1.116092
   9              6    extratrees  1.181660    1       1.119412
   9              6    maxstat     1.226689    1       1.169363
  10             15    maxstat     1.223263    1       1.152708
  10             20    extratrees  1.203973    1       1.130847
  11             10    extratrees  1.192330    1       1.130582
  14            


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

368 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 332, 331, 331, 331, 332, 331, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9655656  0.9279953
   2     0.9728478  0.9427342
   3     0.9809810  0.9596206
   5     0.9827077  0.9634970
   6     0.9818819  0.9617392
   7     0.9872873  0.9730026
   9     0.9827828  0.9638702
  10     0.9818819  0.9614780
  11     0.9836837  0.9653668
  13     0.9827828  0.9635535
  14     0.9809560  0.9596994
  15     0.9809560  0.9597656

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 7.
Random Forest 

239 samples
  7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 216, 215, 214, 214, 216, 215, ... 
Resampling results across tuning parameters:

  min.node.size  m


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

192 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 173, 173, 173, 172, 174, 173, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9947368  0.9883436
   2     0.9982456  0.9961145
   3     0.9982456  0.9961145
   5     0.9982456  0.9961145
   6     0.9982456  0.9961145
   7     1.0000000  1.0000000
   9     1.0000000  1.0000000
  10     0.9982456  0.9961145
  11     1.0000000  1.0000000
  13     0.9964912  0.9922290
  14     0.9964912  0.9922290
  15     0.9982456  0.9961145

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 7.


“There were missing values in resampled performance measures.”


Random Forest 

59 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 54, 52, 53, 53, 53, 53, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             25    extratrees  1.0963021  0.1806110  0.7615951
   1             25    variance    1.0321278  0.2435002  0.7209607
   2             14    maxstat     0.9468905  0.2654887  0.7019651
   5             10    extratrees  1.0586606  0.1674809  0.7428739
   7              9    extratrees  1.0375448  0.1658823  0.7343457
   7             20    extratrees  1.0400380  0.2103841  0.7235160
   9              3    maxstat     0.9361788  0.1897288  0.7306895
  10              1    extratrees  0.9496572  0.1494597  0.7400274
  10             12    variance    0.9719652  0.2826806  0.7070069
  11              5    extratrees  1.0079235  0.1622745  0.7350705
  14              5    variance    0.9749417  0


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)


“There were missing values in resampled performance measures.”


Random Ferns 

99 samples
 7 predictor
 2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 89, 89, 89, 89, 89, 89, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.8329630  0.5236356
   2     0.8829630  0.5896292
   3     0.8525926  0.5260598
   5     0.8288889  0.4747718
   6     0.8422222  0.5035722
   7     0.8292593  0.4573430
   9     0.8085185  0.4373127
  10     0.8092593  0.4563710
  11     0.7859259  0.4192830
  13     0.7385185  0.3472245
  14     0.7422222  0.3381244
  15     0.6985185  0.2987738

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 2.


“There were missing values in resampled performance measures.”


Random Forest 

9 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 8, 8, 8, 8, 8, 8, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1              14    maxstat     0.6705283  NaN       0.6705283
  1              25    variance    0.6454568  NaN       0.6454568
  2               2    maxstat     0.6696101  NaN       0.6696101
  2               5    extratrees  0.6968892  NaN       0.6968892
  3              10    extratrees  0.7024523  NaN       0.7024523
  5               6    maxstat     0.6731229  NaN       0.6731229
  5              10    extratrees  0.6930272  NaN       0.6930272
  5              12    variance    0.6513823  NaN       0.6513823
  5              15    maxstat     0.6686271  NaN       0.6686271
  6               1    extratrees  0.6750577  NaN       0.6750577
  7               5    variance    0.6658893  NaN       0.6658893


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

134 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 121, 121, 120, 120, 121, 121, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9725275  0.9349523
   2     0.9776557  0.9466641
   3     0.9672161  0.9214915
   5     0.9701465  0.9258347
   6     0.9703297  0.9303951
   7     0.9652015  0.9155284
   9     0.9730769  0.9350927
  10     0.9554945  0.8963423
  11     0.9529304  0.8907810
  13     0.9503663  0.8842514
  14     0.9501832  0.8844337
  15     0.9355311  0.8546163

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 2.


“There were missing values in resampled performance measures.”


Random Forest 

33 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 30, 30, 30, 29, 30, 30, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             25    extratrees  0.6937884  0.3434851  0.5635246
   1             25    variance    0.6828642  0.3748051  0.5581353
   2             14    maxstat     0.6669974  0.4321174  0.5856588
   5             10    extratrees  0.6692489  0.4218806  0.5673358
   7              9    extratrees  0.6603098  0.4317292  0.5654806
   7             20    extratrees  0.6770484  0.4191223  0.5649417
   9              3    maxstat     0.6466976  0.4919353  0.5870031
  10              1    extratrees  0.6471112  0.5334943  0.5921784
  10             12    variance    0.6650313  0.3840045  0.5680391
  11              5    extratrees  0.6458565  0.4662199  0.5667442
  14              5    variance    0.6437573  0


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

203 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 183, 183, 183, 182, 183, 183, ... 
Resampling results across tuning parameters:

  depth  Accuracy  Kappa
   1     1         1    
   2     1         1    
   3     1         1    
   5     1         1    
   6     1         1    
   7     1         1    
   9     1         1    
  10     1         1    
  11     1         1    
  13     1         1    
  14     1         1    
  15     1         1    

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.
Random Forest 

90 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 81, 81, 82, 80, 80, 82, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             25    e


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)


“There were missing values in resampled performance measures.”


Random Ferns 

100 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 90, 90, 90, 89, 90, 90, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.7313805  0.3346099
   2     0.7629966  0.3902514
   3     0.7533670  0.3734383
   5     0.7313131  0.3246982
   6     0.7379125  0.3471621
   7     0.7051852  0.3063058
   9     0.6745118  0.2770419
  10     0.6641414  0.2741274
  11     0.6477778  0.2399438
  13     0.5914141  0.1947231
  14     0.5917172  0.1934879
  15     0.6011111  0.1994458

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 2.


“There were missing values in resampled performance measures.”


Random Forest 

9 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 8, 8, 8, 8, 8, 8, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1              14    maxstat     0.6667073  NaN       0.6667073
  1              25    variance    0.7451111  NaN       0.7451111
  2               2    maxstat     0.6153467  NaN       0.6153467
  2               5    extratrees  0.6497203  NaN       0.6497203
  3              10    extratrees  0.6544739  NaN       0.6544739
  5               6    maxstat     0.6481204  NaN       0.6481204
  5              10    extratrees  0.6563624  NaN       0.6563624
  5              12    variance    0.6996497  NaN       0.6996497
  5              15    maxstat     0.6769205  NaN       0.6769205
  6               1    extratrees  0.5959494  NaN       0.5959494
  7               5    variance    0.6493932  NaN       0.6493932


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

128 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 116, 114, 115, 115, 115, 116, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9253053  0.8027330
   2     0.8956044  0.7362080
   3     0.9043651  0.7504041
   5     0.8720085  0.6788808
   6     0.8700855  0.6735186
   7     0.8936203  0.7258716
   9     0.8461844  0.6265966
  10     0.8519231  0.6376832
  11     0.8515263  0.6324325
  13     0.8384921  0.6127918
  14     0.8303724  0.6027513
  15     0.8202991  0.5782469

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.


“There were missing values in resampled performance measures.”


Random Forest 

22 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 20, 20, 20, 19, 20, 20, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             25    extratrees  0.8633071  0.9695330  0.6553000
   1             25    variance    0.8460040  0.9765524  0.6307642
   2             14    maxstat     0.8764104  0.9826092  0.7006834
   5             10    extratrees  0.8145819  0.9755781  0.6341087
   7              9    extratrees  0.8097846  0.9759280  0.6391069
   7             20    extratrees  0.8198846  0.9671132  0.6341740
   9              3    maxstat     0.8773679  0.9928535  0.7434502
  10              1    extratrees  0.8823687  0.9856811  0.7644618
  10             12    variance    0.8347714  0.9880608  0.6513848
  11              5    extratrees  0.8242716  0.9773859  0.6707785
  14              5    variance    0.8350561  0


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)


“There were missing values in resampled performance measures.”


Random Ferns 

100 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 90, 91, 90, 89, 91, 90, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa     
   1     0.7234007  0.29179984
   2     0.7478451  0.28072118
   3     0.7319865  0.25296925
   5     0.6892256  0.21572417
   6     0.6618519  0.17410407
   7     0.6117508  0.15509104
   9     0.5191919  0.09347755
  10     0.5385185  0.11597732
  11     0.4443434  0.06074259
  13     0.3857576  0.05203221
  14     0.3596296  0.04417716
  15     0.3530303  0.04146803

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 2.


“You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.”
“There were missing values in resampled performance measures.”


Random Forest 

7 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 6, 6, 6, 6, 6, 6, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1               5    maxstat     0.2778937  NaN       0.2778937
  1              10    variance    0.2688921  NaN       0.2688921
  2               7    maxstat     0.2751159  NaN       0.2751159
  2               9    maxstat     0.2675127  NaN       0.2675127
  2              10    variance    0.2682397  NaN       0.2682397
  3              14    variance    0.2683651  NaN       0.2683651
  3              21    extratrees  0.2612381  NaN       0.2612381
  3              25    extratrees  0.2615873  NaN       0.2615873
  5               2    variance    0.2849857  NaN       0.2849857
  5               5    extratrees  0.2800016  NaN       0.2800016
  5               9    extratrees  0.2787413  NaN       0.2787413


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)


“There were missing values in resampled performance measures.”


Random Ferns 

103 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 94, 92, 93, 92, 93, 94, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.6874747  0.2077037
   2     0.7046465  0.1961093
   3     0.6979125  0.2032655
   5     0.6848148  0.1818526
   6     0.6677778  0.1727172
   7     0.6492929  0.1677041
   9     0.6174747  0.1492179
  10     0.6096970  0.1513475
  11     0.5812795  0.1304818
  13     0.5615825  0.1244020
  14     0.5488552  0.1126583
  15     0.5236364  0.1004674

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 2.


“You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.”
“There were missing values in resampled performance measures.”


Random Forest 

7 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 6, 6, 6, 6, 6, 6, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1               5    maxstat     0.4506238  NaN       0.4506238
  1              10    variance    0.4142111  NaN       0.4142111
  2               7    maxstat     0.4401381  NaN       0.4401381
  2               9    maxstat     0.4465349  NaN       0.4465349
  2              10    variance    0.4153794  NaN       0.4153794
  3              14    variance    0.4000952  NaN       0.4000952
  3              21    extratrees  0.4333333  NaN       0.4333333
  3              25    extratrees  0.4340635  NaN       0.4340635
  5               2    variance    0.4628968  NaN       0.4628968
  5               5    extratrees  0.4593651  NaN       0.4593651
  5               9    extratrees  0.4582603  NaN       0.4582603


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

229 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 206, 206, 207, 206, 206, 207, ... 
Resampling results across tuning parameters:

  depth  Accuracy  Kappa
   1     1         1    
   2     1         1    
   3     1         1    
   5     1         1    
   6     1         1    
   7     1         1    
   9     1         1    
  10     1         1    
  11     1         1    
  13     1         1    
  14     1         1    
  15     1         1    

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.
Random Forest 

83 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 76, 75, 74, 75, 75, 74, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             25    e


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)


“There were missing values in resampled performance measures.”


Random Ferns 

100 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 90, 90, 90, 89, 91, 90, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.7441751  0.2628386
   2     0.7435017  0.2789350
   3     0.7271380  0.2367484
   5     0.7010101  0.2119653
   6     0.7130976  0.2514747
   7     0.6986532  0.2594945
   9     0.6819192  0.2413365
  10     0.6617845  0.2440425
  11     0.6525253  0.2287492
  13     0.6151178  0.1979920
  14     0.5947475  0.1762827
  15     0.6016498  0.1882010

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.


“There were missing values in resampled performance measures.”


Random Forest 

8 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 7, 7, 7, 7, 7, 7, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1               5    maxstat     0.9666837  NaN       0.9666837
  1              10    variance    0.9247169  NaN       0.9247169
  2               7    maxstat     0.9577312  NaN       0.9577312
  2               9    maxstat     0.9750200  NaN       0.9750200
  2              10    variance    0.9222097  NaN       0.9222097
  3              14    variance    0.9250692  NaN       0.9250692
  3              21    extratrees  0.9430972  NaN       0.9430972
  3              25    extratrees  0.9297361  NaN       0.9297361
  5               2    variance    0.9865375  NaN       0.9865375
  5               5    extratrees  0.9316502  NaN       0.9316502
  5               9    extratrees  0.9390284  NaN       0.9390284


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

88 samples
 7 predictor
 2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 80, 79, 80, 78, 79, 80, ... 
Resampling results across tuning parameters:

  depth  Accuracy    Kappa      
   1     0.21203704  0.002827443
   2     0.17666667  0.000000000
   3     0.14379630  0.000000000
   5     0.07759259  0.000000000
   6     0.05953704  0.000000000
   7     0.04750000  0.000000000
   9     0.03962963  0.000000000
  10     0.03222222  0.000000000
  11     0.03222222  0.000000000
  13     0.03222222  0.000000000
  14     0.03222222  0.000000000
  15     0.03222222  0.000000000

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.


“You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.”
“There were missing values in resampled performance measures.”


Random Forest 

3 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 2, 2, 2, 2, 2, 2, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1               2    variance    0.6628889  NaN       0.6628889
  1               5    extratrees  0.6492222  NaN       0.6492222
  1               5    maxstat     0.6668889  NaN       0.6668889
  1               9    extratrees  0.6477778  NaN       0.6477778
  1              10    variance    0.6653333  NaN       0.6653333
  2               5    extratrees  0.6704444  NaN       0.6704444
  2               7    maxstat     0.6658889  NaN       0.6658889
  2               9    maxstat     0.6661111  NaN       0.6661111
  2              10    variance    0.6623333  NaN       0.6623333
  2              25    maxstat     0.6667778  NaN       0.6667778
  3              14    maxstat     0.6711111  NaN       0.6711111


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

236 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 213, 212, 213, 212, 213, 213, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9250362  0.8222137
   2     0.9308841  0.8317483
   3     0.9250918  0.8202553
   5     0.9267222  0.8239003
   6     0.9267778  0.8237186
   7     0.9282174  0.8259415
   9     0.9309444  0.8336408
  10     0.9240556  0.8183887
  11     0.9239348  0.8152485
  13     0.9195266  0.8058298
  14     0.9182536  0.8044648
  15     0.9099758  0.7870153

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 9.
Random Forest 

53 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 49, 47, 47, 48, 48, 47, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  spl


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

193 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 174, 174, 174, 174, 174, 174, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9518324  0.8840792
   2     0.9519298  0.8834101
   3     0.9585867  0.8985140
   5     0.9546394  0.8907020
   6     0.9412768  0.8593804
   7     0.9498051  0.8760934
   9     0.9324366  0.8377247
  10     0.9413938  0.8598184
  11     0.9238499  0.8171414
  13     0.9100877  0.7945379
  14     0.9029532  0.7739053
  15     0.8906628  0.7516062

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 3.


“There were missing values in resampled performance measures.”


Random Forest 

44 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 40, 40, 40, 39, 40, 40, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             25    extratrees  1.3644888  0.2995922  1.0050088
   1             25    variance    1.3412926  0.2561793  0.9735577
   2             14    maxstat     1.0446010  0.2105315  0.8256427
   5             10    extratrees  1.2231017  0.2943209  0.9247631
   7              9    extratrees  1.1916355  0.2772164  0.9052714
   7             20    extratrees  1.2443799  0.2782616  0.9371139
   9              3    maxstat     0.9809154  0.1812394  0.7888799
  10              1    extratrees  0.9894688  0.2754269  0.7986987
  10             12    variance    1.1741154  0.2043843  0.8860983
  11              5    extratrees  1.1292743  0.2626160  0.8773116
  14              5    variance    1.0968977  0


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

154 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 139, 138, 138, 139, 138, 139, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9890079  0.9739419
   2     0.9869246  0.9720486
   3     0.9867857  0.9696326
   5     0.9786111  0.9532681
   6     0.9760913  0.9481105
   7     0.9784524  0.9520732
   9     0.9719246  0.9387033
  10     0.9717857  0.9369070
  11     0.9717857  0.9389720
  13     0.9674802  0.9296124
  14     0.9655357  0.9256311
  15     0.9631746  0.9188419

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.
Random Forest 

47 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 43, 42, 42, 42, 43, 42, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  spl


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"


In [None]:
#### Post-lockdown 

In [131]:
bus_stops_r1_dir1_post = Post_lock %>%  
    filter(route_id == '1', direction_id == '1') %>%
    pull(stop_id)

In [132]:
bus_stops_r1_dir1_post = unique(bus_stops_r1_dir1_post)
bus_stops_r1_dir1_post

In [None]:
# c(95,72, 1892, 1582, 126, 127, 140, 144, 102, 690)

In [148]:
which(bus_stops_r1_dir1_post == '102', arr.ind = T)

In [149]:
for(i in bus_stops_r1_dir1_post[83:length(bus_stops_r1_dir1_post)]){
    print(i)
    RF_Ferns_and_Ranger('1', '1', i, 'post')
    print('Done!')
    print('_/|_/|_')
}

[1] "690"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




[1] "Done!"
[1] "_/|_/|_"


# Route 9

## Direction 0

### Pre-lockdown

In [69]:
bus_stops_r9_dir0_pre = Pre_lock %>%  
    filter(route_id == '9', direction_id == '0') %>%
    pull(stop_id)

bus_stops_r9_dir0_pre = unique(bus_stops_r9_dir0_pre)
bus_stops_r9_dir0_pre

In [None]:
# c(354,283, 285, 713, 286, 287, 1087, 1710)

In [79]:
which(bus_stops_r9_dir0_pre == '1710', arr.ind = T)

In [80]:
for(i in bus_stops_r9_dir0_pre[79:length(bus_stops_r9_dir0_pre)]){
    print(i)
    RF_Ferns_and_Ranger('9', '0', i, 'pre')
    print('Done!')
    print('_/|_/|_')
}

[1] "1224"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

1311 samples
   7 predictor
   2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 1180, 1180, 1180, 1179, 1180, 1181, ... 
Resampling results across tuning parameters:

  depth  Accuracy  Kappa
   1     1         1    
   2     1         1    
   3     1         1    
   5     1         1    
   6     1         1    
   7     1         1    
   9     1         1    
  10     1         1    
  11     1         1    
  13     1         1    
  14     1         1    
  15     1         1    

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 1.
Random Forest 

508 samples
  7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 458, 457, 457, 457, 457, 457, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared    MAE      
   1  

### Post-lockdown

In [216]:
bus_stops_r9_dir0_post = Post_lock %>%  
    filter(route_id == '9', direction_id == '0') %>%
    pull(stop_id)

bus_stops_r9_dir0_post = unique(bus_stops_r9_dir0_post)
bus_stops_r9_dir0_post

In [217]:
# c(1087, 1088, 1089, 1096, 1104, 1110, 1122, 1129, 1559, 1710)

In [220]:
which(bus_stops_r9_dir0_post == '1710', arr.ind = T)

In [219]:
for(i in bus_stops_r9_dir0_post[77:length(bus_stops_r9_dir0_post)]){
    print(i)
    RF_Ferns_and_Ranger('9', '0', i, 'post')
    print('Done!')
    print('_/|_/|_')
}

[1] "1136"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

168 samples
  7 predictor
  2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 152, 151, 152, 150, 151, 152, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa    
   1     0.9566721  0.9023890
   2     0.9740877  0.9407916
   3     0.9685594  0.9266079
   5     0.9584014  0.9063984
   6     0.9606073  0.9145374
   7     0.9743328  0.9405420
   9     0.9623230  0.9119522
  10     0.9548339  0.8962510
  11     0.9403867  0.8644759
  13     0.9362064  0.8528141
  14     0.9326389  0.8450435
  15     0.9164760  0.8131112

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 7.


“There were missing values in resampled performance measures.”


Random Forest 

44 samples
 7 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 41, 39, 39, 40, 40, 39, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared   MAE      
   1             14    extratrees  1.0826619  0.2805947  0.9113604
   1             15    maxstat     0.9781982  0.2471572  0.8405644
   2              5    variance    1.0384960  0.2763110  0.8737324
   5              6    extratrees  1.0404080  0.2965105  0.8871659
   7              6    maxstat     0.9511712  0.2547429  0.8178960
   7              9    maxstat     0.9621994  0.2570325  0.8263608
   9             10    extratrees  1.0327561  0.2796636  0.8778915
  10              1    extratrees  0.9463656  0.3040321  0.8129537
  10              3    maxstat     0.9422135  0.2465485  0.8105105
  11              5    maxstat     0.9451559  0.2670973  0.8136582
  14              2    extratrees  0.9620750  0


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




ERROR: Error in na.fail.default(structure(list(y_clf_train = structure(c(1L, : missing values in object


## Direction 1

### Pre-lockdown

In [95]:
bus_stops_r9_dir1_pre = Pre_lock %>%  
    filter(route_id == '9', direction_id == '1') %>%
    pull(stop_id)

bus_stops_r9_dir1_pre = unique(bus_stops_r9_dir1_pre)
bus_stops_r9_dir1_pre

In [None]:
# c(169, 170, 171, 172, 505, 690)

In [101]:
which(bus_stops_r9_dir1_pre == '171', arr.ind = T)

In [106]:
for(i in bus_stops_r9_dir1_pre[76:length(bus_stops_r9_dir1_pre)]){
    print(i)
    RF_Ferns_and_Ranger('9', '1', i, 'pre')
    print('Done!')
    print('_/|_/|_')
}

[1] "690"



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [33mcol_logical()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




ERROR: Error in na.fail.default(structure(list(y_clf_train = structure(c(1L, : missing values in object


### Post-lockdown

In [172]:
bus_stops_r9_dir1_post = Post_lock %>%  
    filter(route_id == '9', direction_id == '1') %>%
    pull(stop_id)

bus_stops_r9_dir1_post = unique(bus_stops_r9_dir1_post)
bus_stops_r9_dir1_post

In [None]:
# c(1710, 1143, 1156, 1157, 1189, 690, 1155, 1156, 1157, 1189, 1159, 690)

In [182]:
which(bus_stops_r9_dir1_post == '690', arr.ind = T)

In [183]:
for(i in bus_stops_r9_dir1_post[77:length(bus_stops_r9_dir1_post)]){
    print(i)
    RF_Ferns_and_Ranger('9', '1', i, 'post')
    print('Done!')
    print('_/|_/|_')
}

[1] NA


ERROR: Error: 'data/jmartinez/Data_for_RF_Models/Board_Counts/route_9/direction1/bus_stop_NA//post_lock_train_data.csv' does not exist in current working directory ('/home/jmartinez').


In [192]:
RF_Ferns_and_Ranger('9', '0', '1091', 'post')


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  month = [32mcol_double()[39m,
  service_kind = [31mcol_character()[39m,
  hour = [32mcol_double()[39m,
  board_count = [32mcol_double()[39m,
  mean_temp = [32mcol_double()[39m,
  mean_precip = [32mcol_double()[39m,
  month_average_board_count = [32mcol_double()[39m,
  surrounding_board_count = [32mcol_double()[39m
)




Random Ferns 

88 samples
 7 predictor
 2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 79, 79, 80, 78, 80, 80, ... 
Resampling results across tuning parameters:

  depth  Accuracy   Kappa     
   1     0.6129630  0.16971491
   2     0.6264815  0.18166878
   3     0.6070370  0.16449112
   5     0.5797222  0.15321028
   6     0.5512963  0.12307301
   7     0.5220370  0.10031851
   9     0.4419444  0.07596244
  10     0.4262037  0.06873596
  11     0.3895370  0.05568498
  13     0.3087963  0.03789853
  14     0.2811111  0.03368468
  15     0.2550926  0.02701383

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was depth = 2.


“There were missing values in resampled performance measures.”


Random Forest 

6 samples
7 predictors

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 3 times) 
Summary of sample sizes: 5, 5, 5, 5, 5, 5, ... 
Resampling results across tuning parameters:

  min.node.size  mtry  splitrule   RMSE       Rsquared  MAE      
  1               5    maxstat     0.8538463  NaN       0.8538463
  1               7    maxstat     0.8623630  NaN       0.8623630
  2               9    extratrees  0.8769370  NaN       0.8769370
  2               9    maxstat     0.8634241  NaN       0.8634241
  2              12    variance    0.8102259  NaN       0.8102259
  2              20    extratrees  0.8614556  NaN       0.8614556
  3               5    variance    0.8206037  NaN       0.8206037
  3              14    extratrees  0.8592370  NaN       0.8592370
  3              21    extratrees  0.8695667  NaN       0.8695667
  5               2    extratrees  0.8065333  NaN       0.8065333
  5              10    variance    0.8003556  NaN       0.8003556