From 3e338fb640434c423fff27e95a30c9ac57d3c97b Mon Sep 17 00:00:00 2001
From: schalkdaniel <d.schalk@mail.de>
Date: Sun, 21 Jan 2018 20:55:35 +0100
Subject: [PATCH] add predict functions and update tests

---
 Readme.md                       |  4 +--
 src/compboost.cpp               | 56 +++++++++++++++++++++++++++++++++
 src/compboost.h                 |  3 +-
 src/compboost_modules.cpp       | 32 +++++++++++++++++++
 tests/testthat/test_compboost.R | 20 ++++++++++++
 tutorials/compboost_class.R     |  9 ++++++
 tutorials/compboost_vs_mboost.R | 30 +++++++++++++++---
 7 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/Readme.md b/Readme.md
index 7dc485ae..a0c972d2 100644
--- a/Readme.md
+++ b/Readme.md
@@ -31,8 +31,8 @@ devtools::install_github("schalkdaniel/compboost")
     - [ ] Prediction:
         - [x] General predict function on trian data
         - [ ] Predict function for iteration `k < iter.max`
-        - [ ] Prediction on newdata
-        - [ ] Prediction on newdata for iteration `k < iter.max`
+        - [x] Prediction on newdata
+        - [x] Prediction on newdata for iteration `k < iter.max`
         
 - [ ] Tests:
     - [ ] Iterate over tests (they are notd coded very well)
diff --git a/src/compboost.cpp b/src/compboost.cpp
index 021f6a45..81ea83cb 100644
--- a/src/compboost.cpp
+++ b/src/compboost.cpp
@@ -183,6 +183,62 @@ std::pair<std::vector<std::string>, arma::mat> Compboost::GetParameterMatrix ()
   return blearner_track.GetParameterMatrix();
 }
 
+arma::vec Compboost::Predict (std::map<std::string, arma::mat> data_map)
+{
+  // std::cout << "Get into Compboost::Predict" << std::endl;
+  
+  std::map<std::string, arma::mat> parameter_map = blearner_track.GetParameterMap();
+
+  arma::vec pred(data_map.begin()->second.n_rows);
+  pred.fill(initialization);
+  
+  // std::cout << "initialize pred vec" << std::endl;
+  
+  for (auto& it : parameter_map) {
+    
+    std::string sel_factory = it.first;
+    
+    // std::cout << "Fatory id of parameter map: " << sel_factory << std::endl;
+    
+    blearnerfactory::BaselearnerFactory* sel_factory_obj = used_baselearner_list.GetMap().find(sel_factory)->second;
+    
+    // std::cout << "Data of selected factory: " << sel_factory_obj->GetDataIdentifier() << std::endl;
+    
+    arma::mat data_trafo = sel_factory_obj->InstantiateData((data_map.find(sel_factory_obj->GetDataIdentifier())->second));
+    pred += data_trafo * it.second;
+    
+  }
+  return pred;
+}
+
+arma::vec Compboost::PredictionOfIteration (std::map<std::string, arma::mat> data_map, unsigned int k)
+{
+  // std::cout << "Get into Compboost::Predict" << std::endl;
+  
+  std::map<std::string, arma::mat> parameter_map = blearner_track.GetEstimatedParameterForIteration(k);
+  
+  arma::vec pred(data_map.begin()->second.n_rows);
+  pred.fill(initialization);
+  
+  // std::cout << "initialize pred vec" << std::endl;
+  
+  for (auto& it : parameter_map) {
+    
+    std::string sel_factory = it.first;
+    
+    // std::cout << "Fatory id of parameter map: " << sel_factory << std::endl;
+    
+    blearnerfactory::BaselearnerFactory* sel_factory_obj = used_baselearner_list.GetMap().find(sel_factory)->second;
+    
+    // std::cout << "Data of selected factory: " << sel_factory_obj->GetDataIdentifier() << std::endl;
+    
+    arma::mat data_trafo = sel_factory_obj->InstantiateData((data_map.find(sel_factory_obj->GetDataIdentifier())->second));
+    pred += data_trafo * it.second;
+    
+  }
+  return pred;
+}
+
 // Destructor:
 Compboost::~Compboost ()
 {
diff --git a/src/compboost.h b/src/compboost.h
index d5d3a9d1..db4a6e2e 100644
--- a/src/compboost.h
+++ b/src/compboost.h
@@ -97,7 +97,8 @@ class Compboost
     
     std::pair<std::vector<std::string>, arma::mat> GetParameterMatrix ();
     
-    // arma::mat Predict (std::map<std::string, arma::mat>);
+    arma::vec Predict (std::map<std::string, arma::mat>);
+    arma::vec PredictionOfIteration (std::map<std::string, arma::mat>, unsigned int);
     
     // Destructor:
     ~Compboost ();
diff --git a/src/compboost_modules.cpp b/src/compboost_modules.cpp
index c8d4657a..6cc816fb 100644
--- a/src/compboost_modules.cpp
+++ b/src/compboost_modules.cpp
@@ -790,6 +790,36 @@ class CompboostWrapper
       );
   }
   
+  arma::vec predict (Rcpp::List input_data)
+  {
+    std::map<std::string, arma::mat> data_map;
+    
+    // Create data map:
+    for (unsigned int i = 0; i < input_data.size(); i++) {
+      
+      std::vector<std::string> names = input_data.names();
+      arma::mat temp = Rcpp::as<arma::mat>(input_data[i]);
+      data_map[ names[i] ] = temp;
+      
+    }
+    return obj->Predict(data_map);
+  }
+  
+  arma::vec predictionOfIteration (Rcpp::List input_data, unsigned int k)
+  {
+    std::map<std::string, arma::mat> data_map;
+    
+    // Create data map:
+    for (unsigned int i = 0; i < input_data.size(); i++) {
+      
+      std::vector<std::string> names = input_data.names();
+      arma::mat temp = Rcpp::as<arma::mat>(input_data[i]);
+      data_map[ names[i] ] = temp;
+      
+    }
+    return obj->PredictionOfIteration(data_map, k);
+  }
+  
   // Destructor:
   ~CompboostWrapper ()
   {
@@ -827,6 +857,8 @@ RCPP_MODULE (compboost_module)
     .method("getEstimatedParameter", &CompboostWrapper::getEstimatedParameter, "Get the estimated paraemter")
     .method("getEstimatedParameterOfIteration", &CompboostWrapper::getEstimatedParameterOfIteration, "Get the estimated parameter for iteration k < iter.max")
     .method("getParameterMatrix", &CompboostWrapper::getParameterMatrix, "Get matrix of all estimated parameter in each iteration")
+    .method("predict", &CompboostWrapper::predict, "Predict newdata")
+    .method("predictionOfIteration", &CompboostWrapper::predictionOfIteration, "Predict newdata for iteration k < iter.max")
   ;
 }
 
diff --git a/tests/testthat/test_compboost.R b/tests/testthat/test_compboost.R
index 4c077c6f..f334f3db 100644
--- a/tests/testthat/test_compboost.R
+++ b/tests/testthat/test_compboost.R
@@ -16,6 +16,20 @@ test_that("compboost does the same as mboost", {
   
   y = df[["mpg"]]
   
+  eval.hp = runif(10)
+  eval.wt = runif(10)
+  
+  eval.data = list(
+    "hp" = as.matrix(eval.hp),
+    "wt" = as.matrix(eval.wt)
+  )
+  
+  eval.df = data.frame(
+    hp  = eval.hp,
+    wt  = eval.wt,
+    hp2 = eval.hp^2
+  )
+  
   # Hyperparameter for the algorithm:
   learning.rate = 0.05
   iter.max = 500
@@ -163,5 +177,11 @@ test_that("compboost does the same as mboost", {
 
   expect_equal(cboost$getParameterMatrix()$parameter.matrix[idx, ], matrix.compare)
 
+  # Test if prediction works:
+  # --------------------------
+  
+  expect_equal(cboost$predict(eval.data), predict(mod, eval.df))
+  expect_equal(cboost$predictionOfIteration(eval.data, 200), predict(mod.reduced, eval.df))
+  
 })
 
diff --git a/tutorials/compboost_class.R b/tutorials/compboost_class.R
index 94c0c9b4..3b458415 100644
--- a/tutorials/compboost_class.R
+++ b/tutorials/compboost_class.R
@@ -23,6 +23,11 @@ X.wt = as.matrix(df[["wt"]], ncol = 1)
 
 y = df[["mpg"]]
 
+eval.data = list(
+  "hp" = as.matrix(mtcars$hp),
+  "wt" = as.matrix(mtcars$wt)
+)
+
 # Hyperparameter for the algorithm:
 learning.rate = 0.05
 iter.max = 500
@@ -101,3 +106,7 @@ cboost$getLoggerData()
 
 # Get parameter matrix:
 param.matrix = cboost$getParameterMatrix()
+
+# Predict for "new data":
+cboost$predict(eval.data)
+cboost$predictionOfIteration(eval.data, 200)
diff --git a/tutorials/compboost_vs_mboost.R b/tutorials/compboost_vs_mboost.R
index 4601ebce..04d1886e 100644
--- a/tutorials/compboost_vs_mboost.R
+++ b/tutorials/compboost_vs_mboost.R
@@ -13,12 +13,26 @@ df = mtcars
 # Create new variable to check the polynomial baselearner with degree 2:
 df$hp2 = df[["hp"]]^2
 
-# Data for compboost:
+# Data for compboost, wt with intercept:
 X.hp = as.matrix(df[["hp"]], ncol = 1)
-X.wt = as.matrix(df[["wt"]], ncol = 1)
+X.wt = cbind(1, df[["wt"]])
 
 y = df[["mpg"]]
 
+eval.hp = runif(10)
+eval.wt = runif(10)
+
+eval.data = list(
+  "hp" = as.matrix(eval.hp),
+  "wt" = cbind(1, eval.wt)
+)
+
+eval.df = data.frame(
+  hp  = eval.hp,
+  wt  = eval.wt,
+  hp2 = eval.hp^2
+)
+
 # Hyperparameter for the algorithm:
 learning.rate = 0.05
 iter.max = 500
@@ -91,7 +105,7 @@ library(mboost)
 
 mod = mboost(
   formula = mpg ~ bols(hp, intercept = FALSE) + 
-    bols(wt, intercept = FALSE) +
+    bols(wt) +
     bols(hp2, intercept = FALSE), 
   data    = df, 
   control = boost_control(mstop = iter.max, nu = learning.rate)
@@ -135,7 +149,7 @@ microbenchmark::microbenchmark(
     data    = df, 
     control = boost_control(mstop = iter.max, nu = learning.rate)
   ),
-  times = 10L
+  times = 100L
 )
 
 # Profiling to compare used memory:
@@ -157,7 +171,7 @@ print(p)
 
 mod.reduced = mboost(
   formula = mpg ~ bols(hp, intercept = FALSE) + 
-    bols(wt, intercept = FALSE) +
+    bols(wt) +
     bols(hp2, intercept = FALSE), 
   data    = df, 
   control = boost_control(mstop = 200, nu = learning.rate)
@@ -165,3 +179,9 @@ mod.reduced = mboost(
 
 mod.reduced$coef()
 cboost$getEstimatedParameterOfIteration(200)
+
+
+
+predict(mod.reduced, eval.df)
+cboost$predictionOfIteration(eval.data, 200)
+