From 3e338fb640434c423fff27e95a30c9ac57d3c97b Mon Sep 17 00:00:00 2001 From: schalkdaniel Date: Sun, 21 Jan 2018 20:55:35 +0100 Subject: [PATCH] add predict functions and update tests --- Readme.md | 4 +-- src/compboost.cpp | 56 +++++++++++++++++++++++++++++++++ src/compboost.h | 3 +- src/compboost_modules.cpp | 32 +++++++++++++++++++ tests/testthat/test_compboost.R | 20 ++++++++++++ tutorials/compboost_class.R | 9 ++++++ tutorials/compboost_vs_mboost.R | 30 +++++++++++++++--- 7 files changed, 146 insertions(+), 8 deletions(-) diff --git a/Readme.md b/Readme.md index 7dc485ae..a0c972d2 100644 --- a/Readme.md +++ b/Readme.md @@ -31,8 +31,8 @@ devtools::install_github("schalkdaniel/compboost") - [ ] Prediction: - [x] General predict function on trian data - [ ] Predict function for iteration `k < iter.max` - - [ ] Prediction on newdata - - [ ] Prediction on newdata for iteration `k < iter.max` + - [x] Prediction on newdata + - [x] Prediction on newdata for iteration `k < iter.max` - [ ] Tests: - [ ] Iterate over tests (they are notd coded very well) diff --git a/src/compboost.cpp b/src/compboost.cpp index 021f6a45..81ea83cb 100644 --- a/src/compboost.cpp +++ b/src/compboost.cpp @@ -183,6 +183,62 @@ std::pair, arma::mat> Compboost::GetParameterMatrix () return blearner_track.GetParameterMatrix(); } +arma::vec Compboost::Predict (std::map data_map) +{ + // std::cout << "Get into Compboost::Predict" << std::endl; + + std::map parameter_map = blearner_track.GetParameterMap(); + + arma::vec pred(data_map.begin()->second.n_rows); + pred.fill(initialization); + + // std::cout << "initialize pred vec" << std::endl; + + for (auto& it : parameter_map) { + + std::string sel_factory = it.first; + + // std::cout << "Fatory id of parameter map: " << sel_factory << std::endl; + + blearnerfactory::BaselearnerFactory* sel_factory_obj = used_baselearner_list.GetMap().find(sel_factory)->second; + + // std::cout << "Data of selected factory: " << sel_factory_obj->GetDataIdentifier() << std::endl; + + arma::mat data_trafo = sel_factory_obj->InstantiateData((data_map.find(sel_factory_obj->GetDataIdentifier())->second)); + pred += data_trafo * it.second; + + } + return pred; +} + +arma::vec Compboost::PredictionOfIteration (std::map data_map, unsigned int k) +{ + // std::cout << "Get into Compboost::Predict" << std::endl; + + std::map parameter_map = blearner_track.GetEstimatedParameterForIteration(k); + + arma::vec pred(data_map.begin()->second.n_rows); + pred.fill(initialization); + + // std::cout << "initialize pred vec" << std::endl; + + for (auto& it : parameter_map) { + + std::string sel_factory = it.first; + + // std::cout << "Fatory id of parameter map: " << sel_factory << std::endl; + + blearnerfactory::BaselearnerFactory* sel_factory_obj = used_baselearner_list.GetMap().find(sel_factory)->second; + + // std::cout << "Data of selected factory: " << sel_factory_obj->GetDataIdentifier() << std::endl; + + arma::mat data_trafo = sel_factory_obj->InstantiateData((data_map.find(sel_factory_obj->GetDataIdentifier())->second)); + pred += data_trafo * it.second; + + } + return pred; +} + // Destructor: Compboost::~Compboost () { diff --git a/src/compboost.h b/src/compboost.h index d5d3a9d1..db4a6e2e 100644 --- a/src/compboost.h +++ b/src/compboost.h @@ -97,7 +97,8 @@ class Compboost std::pair, arma::mat> GetParameterMatrix (); - // arma::mat Predict (std::map); + arma::vec Predict (std::map); + arma::vec PredictionOfIteration (std::map, unsigned int); // Destructor: ~Compboost (); diff --git a/src/compboost_modules.cpp b/src/compboost_modules.cpp index c8d4657a..6cc816fb 100644 --- a/src/compboost_modules.cpp +++ b/src/compboost_modules.cpp @@ -790,6 +790,36 @@ class CompboostWrapper ); } + arma::vec predict (Rcpp::List input_data) + { + std::map data_map; + + // Create data map: + for (unsigned int i = 0; i < input_data.size(); i++) { + + std::vector names = input_data.names(); + arma::mat temp = Rcpp::as(input_data[i]); + data_map[ names[i] ] = temp; + + } + return obj->Predict(data_map); + } + + arma::vec predictionOfIteration (Rcpp::List input_data, unsigned int k) + { + std::map data_map; + + // Create data map: + for (unsigned int i = 0; i < input_data.size(); i++) { + + std::vector names = input_data.names(); + arma::mat temp = Rcpp::as(input_data[i]); + data_map[ names[i] ] = temp; + + } + return obj->PredictionOfIteration(data_map, k); + } + // Destructor: ~CompboostWrapper () { @@ -827,6 +857,8 @@ RCPP_MODULE (compboost_module) .method("getEstimatedParameter", &CompboostWrapper::getEstimatedParameter, "Get the estimated paraemter") .method("getEstimatedParameterOfIteration", &CompboostWrapper::getEstimatedParameterOfIteration, "Get the estimated parameter for iteration k < iter.max") .method("getParameterMatrix", &CompboostWrapper::getParameterMatrix, "Get matrix of all estimated parameter in each iteration") + .method("predict", &CompboostWrapper::predict, "Predict newdata") + .method("predictionOfIteration", &CompboostWrapper::predictionOfIteration, "Predict newdata for iteration k < iter.max") ; } diff --git a/tests/testthat/test_compboost.R b/tests/testthat/test_compboost.R index 4c077c6f..f334f3db 100644 --- a/tests/testthat/test_compboost.R +++ b/tests/testthat/test_compboost.R @@ -16,6 +16,20 @@ test_that("compboost does the same as mboost", { y = df[["mpg"]] + eval.hp = runif(10) + eval.wt = runif(10) + + eval.data = list( + "hp" = as.matrix(eval.hp), + "wt" = as.matrix(eval.wt) + ) + + eval.df = data.frame( + hp = eval.hp, + wt = eval.wt, + hp2 = eval.hp^2 + ) + # Hyperparameter for the algorithm: learning.rate = 0.05 iter.max = 500 @@ -163,5 +177,11 @@ test_that("compboost does the same as mboost", { expect_equal(cboost$getParameterMatrix()$parameter.matrix[idx, ], matrix.compare) + # Test if prediction works: + # -------------------------- + + expect_equal(cboost$predict(eval.data), predict(mod, eval.df)) + expect_equal(cboost$predictionOfIteration(eval.data, 200), predict(mod.reduced, eval.df)) + }) diff --git a/tutorials/compboost_class.R b/tutorials/compboost_class.R index 94c0c9b4..3b458415 100644 --- a/tutorials/compboost_class.R +++ b/tutorials/compboost_class.R @@ -23,6 +23,11 @@ X.wt = as.matrix(df[["wt"]], ncol = 1) y = df[["mpg"]] +eval.data = list( + "hp" = as.matrix(mtcars$hp), + "wt" = as.matrix(mtcars$wt) +) + # Hyperparameter for the algorithm: learning.rate = 0.05 iter.max = 500 @@ -101,3 +106,7 @@ cboost$getLoggerData() # Get parameter matrix: param.matrix = cboost$getParameterMatrix() + +# Predict for "new data": +cboost$predict(eval.data) +cboost$predictionOfIteration(eval.data, 200) diff --git a/tutorials/compboost_vs_mboost.R b/tutorials/compboost_vs_mboost.R index 4601ebce..04d1886e 100644 --- a/tutorials/compboost_vs_mboost.R +++ b/tutorials/compboost_vs_mboost.R @@ -13,12 +13,26 @@ df = mtcars # Create new variable to check the polynomial baselearner with degree 2: df$hp2 = df[["hp"]]^2 -# Data for compboost: +# Data for compboost, wt with intercept: X.hp = as.matrix(df[["hp"]], ncol = 1) -X.wt = as.matrix(df[["wt"]], ncol = 1) +X.wt = cbind(1, df[["wt"]]) y = df[["mpg"]] +eval.hp = runif(10) +eval.wt = runif(10) + +eval.data = list( + "hp" = as.matrix(eval.hp), + "wt" = cbind(1, eval.wt) +) + +eval.df = data.frame( + hp = eval.hp, + wt = eval.wt, + hp2 = eval.hp^2 +) + # Hyperparameter for the algorithm: learning.rate = 0.05 iter.max = 500 @@ -91,7 +105,7 @@ library(mboost) mod = mboost( formula = mpg ~ bols(hp, intercept = FALSE) + - bols(wt, intercept = FALSE) + + bols(wt) + bols(hp2, intercept = FALSE), data = df, control = boost_control(mstop = iter.max, nu = learning.rate) @@ -135,7 +149,7 @@ microbenchmark::microbenchmark( data = df, control = boost_control(mstop = iter.max, nu = learning.rate) ), - times = 10L + times = 100L ) # Profiling to compare used memory: @@ -157,7 +171,7 @@ print(p) mod.reduced = mboost( formula = mpg ~ bols(hp, intercept = FALSE) + - bols(wt, intercept = FALSE) + + bols(wt) + bols(hp2, intercept = FALSE), data = df, control = boost_control(mstop = 200, nu = learning.rate) @@ -165,3 +179,9 @@ mod.reduced = mboost( mod.reduced$coef() cboost$getEstimatedParameterOfIteration(200) + + + +predict(mod.reduced, eval.df) +cboost$predictionOfIteration(eval.data, 200) +