From 653bde3f5f8324a1980902bf0c3b15677dd9d30f Mon Sep 17 00:00:00 2001 From: schalkdaniel Date: Sat, 20 Jan 2018 15:26:42 +0100 Subject: [PATCH 1/2] add some parameter stuff to compboost --- src/baselearner_track.cpp | 90 +++++++++++++++++++++++++++++++-- src/baselearner_track.h | 10 +++- src/compboost.cpp | 30 +++++------ src/compboost.h | 7 +-- src/compboost_modules.cpp | 28 +++++++++- tests/testthat/test_compboost.R | 42 +++++++++++++-- tutorials/compboost_class.R | 6 +++ tutorials/compboost_vs_mboost.R | 13 +++++ 8 files changed, 197 insertions(+), 29 deletions(-) diff --git a/src/baselearner_track.cpp b/src/baselearner_track.cpp index ad46a441..86f9657e 100644 --- a/src/baselearner_track.cpp +++ b/src/baselearner_track.cpp @@ -45,10 +45,11 @@ namespace blearnertrack // Just an empty constructor: BaselearnerTrack::BaselearnerTrack () {}; +BaselearnerTrack::BaselearnerTrack (double learning_rate) : learning_rate ( learning_rate ) {}; + // Insert a baselearner to the vector. We also want to add up the parameter // in there to get an estimator in the end: -void BaselearnerTrack::InsertBaselearner (blearner::Baselearner* blearner, - double learning_rate) +void BaselearnerTrack::InsertBaselearner (blearner::Baselearner* blearner) { // Insert new baselearner: blearner_vector.push_back(blearner); @@ -103,10 +104,93 @@ void BaselearnerTrack::ClearBaselearnerVector () blearner_vector.clear(); } +// Get estimated parameter for specific iteration: +std::map BaselearnerTrack::GetEstimatedParameterForIteration (unsigned int k) +{ + // Create new parameter map: + std::map my_new_parameter_map; + + if (k <= blearner_vector.size()) { + + for (unsigned int i = 0; i < k; i++) { + std::string insert_id = blearner_vector[i]->GetDataIdentifier() + ": " + blearner_vector[i]->GetBaselearnerType(); + + // Check if the baselearner is the first one. If so, the parameter + // has to be instantiated with a zero matrix: + std::map::iterator it = my_new_parameter_map.find(insert_id); + + // Prune parameter by multiplying it with the learning rate: + arma::mat parameter_temp = learning_rate * blearner_vector[i]->GetParameter(); + + // Check if this is the first parameter entry: + if (it == my_new_parameter_map.end()) { + + // If this is the first entry, initialize it with zeros: + arma::mat init_parameter(parameter_temp.n_rows, parameter_temp.n_cols, arma::fill::zeros); + my_new_parameter_map.insert(std::pair(insert_id, init_parameter)); + + } + + // Accumulating parameter. If there is a nan, then this will be ignored and + // the non nan entries are added up: + // arma::mat parameter_insert = parameter_temp + my_parameter_map.find(blearner->GetBaselearnerType())->second; + // my_parameter_map.insert(std::pair(blearner->GetBaselearnerType(), parameter_insert)); + my_new_parameter_map[ insert_id ] = parameter_temp + my_new_parameter_map.find(insert_id)->second; + } + } + return my_new_parameter_map; +} + +// Create parameter matrix: +std::pair, arma::mat> BaselearnerTrack::GetParameterMatrix () +{ + // Instantiate list to iterate: + std::map my_new_parameter_map = my_parameter_map; + + unsigned int cols = 0; + + // Set all parameter to zero in new map: + for (auto& it : my_new_parameter_map) { + arma::mat init_parameter (it.second.n_rows, it.second.n_cols, arma::fill::zeros); + my_new_parameter_map[ it.first ] = init_parameter; + + cols += it.second.n_cols; + } + + // Initialize matrix: + arma::mat parameters (blearner_vector.size(), cols, arma::fill::zeros); + + for (unsigned int i = 0; i < blearner_vector.size(); i++) { + std::string insert_id = blearner_vector[i]->GetDataIdentifier() + ": " + blearner_vector[i]->GetBaselearnerType(); + + // Prune parameter by multiplying it with the learning rate: + arma::mat parameter_temp = learning_rate * blearner_vector[i]->GetParameter(); + + // Accumulating parameter. If there is a nan, then this will be ignored and + // the non nan entries are added up: + my_new_parameter_map[ insert_id ] = parameter_temp + my_new_parameter_map.find(insert_id)->second; + + arma::mat param_insert; + + for (auto& it : my_new_parameter_map) { + param_insert = arma::join_rows(param_insert, it.second); + } + parameters.row(i) = param_insert; + } + std::pair, arma::mat> out_pair; + + for (auto& it : my_new_parameter_map) { + out_pair.first.push_back(it.first); + } + out_pair.second = parameters; + + return out_pair; +} + // Destructor: BaselearnerTrack::~BaselearnerTrack () { - std::cout << "Call BaselearnerTrack Destructor" << std::endl; + // std::cout << "Call BaselearnerTrack Destructor" << std::endl; for (unsigned int i = 0; i< blearner_vector.size(); i++) { delete blearner_vector[i]; diff --git a/src/baselearner_track.h b/src/baselearner_track.h index 5289fb1f..1fa0d082 100644 --- a/src/baselearner_track.h +++ b/src/baselearner_track.h @@ -64,12 +64,15 @@ class BaselearnerTrack // iteration: std::map my_parameter_map; + double learning_rate; + public: BaselearnerTrack (); + BaselearnerTrack (double); // Insert a baselearner into vector and update parameter: - void InsertBaselearner (blearner::Baselearner*, double); + void InsertBaselearner (blearner::Baselearner*); // Return the vector of baselearner: std::vector GetBaselearnerVector (); @@ -80,8 +83,11 @@ class BaselearnerTrack // Clear the vector without deleting the data in the factory: void ClearBaselearnerVector (); + // Estimate parameter for specific iteration: + std::map GetEstimatedParameterForIteration (unsigned int); + // Returns a matrix of parameters for every iteration: - // arma::mat GetParameterMatrix (); + std::pair, arma::mat> GetParameterMatrix (); // Destructor: ~BaselearnerTrack (); diff --git a/src/compboost.cpp b/src/compboost.cpp index 73af1c2a..021f6a45 100644 --- a/src/compboost.cpp +++ b/src/compboost.cpp @@ -59,7 +59,10 @@ Compboost::Compboost (arma::vec response, double learning_rate, used_optimizer ( used_optimizer ), used_loss ( used_loss ), used_baselearner_list ( used_baselearner_list ), - used_logger ( used_logger ) {} + used_logger ( used_logger ) +{ + blearner_track = blearnertrack::BaselearnerTrack(learning_rate); +} // --------------------------------------------------------------------------- # // Member functions: @@ -106,7 +109,7 @@ void Compboost::TrainCompboost (bool trace) // std::cout << "<> Cast integer k to string for baselearner identifier" << std::endl; // Insert new baselearner to vector of selected baselearner: - blearner_track.InsertBaselearner(selected_blearner, learning_rate); + blearner_track.InsertBaselearner(selected_blearner); // std::cout << "<> Insert new baselearner to vector of selected baselearner" << std::endl; // Update model (prediction) and shrink by learning rate: @@ -170,20 +173,15 @@ std::vector Compboost::GetSelectedBaselearner () return selected_blearner; } -// arma::vec Compboost::PredictEnsemble () -// { -// arma::vec prediction(response.size()); -// prediction.fill(initialization); -// -// std::cout << "Initialize the b0 model with: " << initialization << std::endl; -// std::cout << "Rows = " << prediction.n_rows << " Cols = " << prediction.n_cols << std::endl; -// -// for (std::vector::iterator it = blearner_track->GetBaselearnerVector().begin(); it != blearner_track->GetBaselearnerVector().end(); ++it) { -// std::cout << "Now iterating over baselearner!" << std::endl; -// prediction += (*it)->predict(); -// } -// return prediction; -// } +std::map Compboost::GetParameterOfIteration (unsigned int k) +{ + return blearner_track.GetEstimatedParameterForIteration(k); +} + +std::pair, arma::mat> Compboost::GetParameterMatrix () +{ + return blearner_track.GetParameterMatrix(); +} // Destructor: Compboost::~Compboost () diff --git a/src/compboost.h b/src/compboost.h index 11b20f0d..5a9e69fa 100644 --- a/src/compboost.h +++ b/src/compboost.h @@ -73,7 +73,7 @@ class Compboost bool stop_if_all_stopper_fulfilled; // Pieces to run the algorithm: - blearnertrack::BaselearnerTrack blearner_track = blearnertrack::BaselearnerTrack(); + blearnertrack::BaselearnerTrack blearner_track; optimizer::Optimizer* used_optimizer; loss::Loss* used_loss; blearnerlist::BaselearnerList used_baselearner_list; @@ -93,8 +93,9 @@ class Compboost std::map GetParameter (); std::vector GetSelectedBaselearner (); - // arma::vec PredictEnsemble (); - // arma::vec PredictEnsemble (arma::mat &); + std::map GetParameterOfIteration (unsigned int); + + std::pair, arma::mat> GetParameterMatrix (); // Destructor: ~Compboost (); diff --git a/src/compboost_modules.cpp b/src/compboost_modules.cpp index f544b83f..7744434a 100644 --- a/src/compboost_modules.cpp +++ b/src/compboost_modules.cpp @@ -591,8 +591,8 @@ class CompboostWrapper Rcpp::List getLoggerData () { return Rcpp::List::create( - Rcpp::Named("logger_names") = used_logger->GetLoggerData().first, - Rcpp::Named("logger_data") = used_logger->GetLoggerData().second + Rcpp::Named("logger.names") = used_logger->GetLoggerData().first, + Rcpp::Named("logger.data") = used_logger->GetLoggerData().second ); } @@ -608,6 +608,28 @@ class CompboostWrapper return out; } + Rcpp::List getEstimatedParameterOfIteration (unsigned int k) + { + std::map parameter = obj->GetParameterOfIteration(k); + + Rcpp::List out; + + for (auto &it : parameter) { + out[it.first] = it.second; + } + return out; + } + + Rcpp::List getParameterMatrix () + { + std::pair, arma::mat> out_pair = obj->GetParameterMatrix(); + + return Rcpp::List::create( + Rcpp::Named("parameter.names") = out_pair.first, + Rcpp::Named("parameter.matrix") = out_pair.second + ); + } + // Destructor: ~CompboostWrapper () { @@ -643,6 +665,8 @@ RCPP_MODULE (compboost_module) .method("getSelectedBaselearner", &CompboostWrapper::getSelectedBaselearner, "Get vector of selected baselearner") .method("getLoggerData", &CompboostWrapper::getLoggerData, "Get data of the used logger") .method("getEstimatedParameter", &CompboostWrapper::getEstimatedParameter, "Get the estimated paraemter") + .method("getEstimatedParameterOfIteration", &CompboostWrapper::getEstimatedParameterOfIteration, "Get the estimated parameter for iteration k < iter.max") + .method("getParameterMatrix", &CompboostWrapper::getParameterMatrix, "Get matrix of all estimated parameter in each iteration") ; } diff --git a/tests/testthat/test_compboost.R b/tests/testthat/test_compboost.R index 45929760..3320793a 100644 --- a/tests/testthat/test_compboost.R +++ b/tests/testthat/test_compboost.R @@ -122,9 +122,45 @@ test_that("compboost does the same as mboost", { unname(unlist(cboost$getEstimatedParameter())) ) - expect_equal(dim(cboost$getLoggerData()$logger_data), c(500, 2)) - expect_equal(cboost$getLoggerData()$logger_data[, 1], 1:500) - expect_equal(length(cboost$getLoggerData()$logger_data[, 2]), 500) + expect_equal(dim(cboost$getLoggerData()$logger.data), c(500, 2)) + expect_equal(cboost$getLoggerData()$logger.data[, 1], 1:500) + expect_equal(length(cboost$getLoggerData()$logger.data[, 2]), 500) + # Check if paraemter getter of smaller iteration works: + suppressWarnings({ + mod.reduced = mboost( + formula = mpg ~ bols(hp, intercept = FALSE) + + bols(wt, intercept = FALSE) + + bols(hp2, intercept = FALSE), + data = df, + control = boost_control(mstop = 200, nu = learning.rate) + ) + }) + + expect_equal( + unname( + unlist( + mod.reduced$coef()[ + order( + unlist( + lapply(names(unlist(mod.reduced$coef()[1:3])), function (x) { + strsplit(x, "[.]")[[1]][2] + }) + ) + ) + ] + ) + ), + unname(unlist(cboost$getEstimatedParameterOfIteration(200))) + ) + + idx = sample(1:500, 3) + matrix.compare = matrix(NA_real_, nrow = 3, ncol = 3) + + for (i in seq_along(idx)) { + matrix.compare[i, ] = unname(unlist(cboost$getEstimatedParameterOfIteration(idx[i]))) + } + + expect_equal(cboost$getParameterMatrix()$parameter.matrix[idx, ], matrix.compare) }) diff --git a/tutorials/compboost_class.R b/tutorials/compboost_class.R index 951a9d93..94c0c9b4 100644 --- a/tutorials/compboost_class.R +++ b/tutorials/compboost_class.R @@ -93,5 +93,11 @@ cboost$getSelectedBaselearner() # Get estimated parameter: cboost$getEstimatedParameter() +# Get estimated parameter of 200 iteration: +cboost$getEstimatedParameterOfIteration(200) + # Get logger data: cboost$getLoggerData() + +# Get parameter matrix: +param.matrix = cboost$getParameterMatrix() diff --git a/tutorials/compboost_vs_mboost.R b/tutorials/compboost_vs_mboost.R index b1b36d2d..4601ebce 100644 --- a/tutorials/compboost_vs_mboost.R +++ b/tutorials/compboost_vs_mboost.R @@ -152,3 +152,16 @@ p = profvis::profvis({ print(p) +# Check if parameter of smaller iteration works: +# ---------------------------------------------- + +mod.reduced = mboost( + formula = mpg ~ bols(hp, intercept = FALSE) + + bols(wt, intercept = FALSE) + + bols(hp2, intercept = FALSE), + data = df, + control = boost_control(mstop = 200, nu = learning.rate) +) + +mod.reduced$coef() +cboost$getEstimatedParameterOfIteration(200) From 06e41efdd09864ace189bedbc1ae94d0d4f947a9 Mon Sep 17 00:00:00 2001 From: schalkdaniel Date: Sat, 20 Jan 2018 15:38:12 +0100 Subject: [PATCH 2/2] update test, to small indizes produce < 3 parameter > 0 which crash the test --- tests/testthat/test_compboost.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_compboost.R b/tests/testthat/test_compboost.R index 3320793a..b29895f0 100644 --- a/tests/testthat/test_compboost.R +++ b/tests/testthat/test_compboost.R @@ -154,7 +154,7 @@ test_that("compboost does the same as mboost", { unname(unlist(cboost$getEstimatedParameterOfIteration(200))) ) - idx = sample(1:500, 3) + idx = 2:4 * 120 matrix.compare = matrix(NA_real_, nrow = 3, ncol = 3) for (i in seq_along(idx)) {