Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add some parameter stuff to compboost #99

Merged
merged 2 commits into from Jan 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
90 changes: 87 additions & 3 deletions src/baselearner_track.cpp
Expand Up @@ -45,10 +45,11 @@ namespace blearnertrack
// Just an empty constructor:
BaselearnerTrack::BaselearnerTrack () {};

BaselearnerTrack::BaselearnerTrack (double learning_rate) : learning_rate ( learning_rate ) {};

// Insert a baselearner to the vector. We also want to add up the parameter
// in there to get an estimator in the end:
void BaselearnerTrack::InsertBaselearner (blearner::Baselearner* blearner,
double learning_rate)
void BaselearnerTrack::InsertBaselearner (blearner::Baselearner* blearner)
{
// Insert new baselearner:
blearner_vector.push_back(blearner);
Expand Down Expand Up @@ -103,10 +104,93 @@ void BaselearnerTrack::ClearBaselearnerVector ()
blearner_vector.clear();
}

// Get estimated parameter for specific iteration:
std::map<std::string, arma::mat> BaselearnerTrack::GetEstimatedParameterForIteration (unsigned int k)
{
// Create new parameter map:
std::map<std::string, arma::mat> my_new_parameter_map;

if (k <= blearner_vector.size()) {

for (unsigned int i = 0; i < k; i++) {
std::string insert_id = blearner_vector[i]->GetDataIdentifier() + ": " + blearner_vector[i]->GetBaselearnerType();

// Check if the baselearner is the first one. If so, the parameter
// has to be instantiated with a zero matrix:
std::map<std::string, arma::mat>::iterator it = my_new_parameter_map.find(insert_id);

// Prune parameter by multiplying it with the learning rate:
arma::mat parameter_temp = learning_rate * blearner_vector[i]->GetParameter();

// Check if this is the first parameter entry:
if (it == my_new_parameter_map.end()) {

// If this is the first entry, initialize it with zeros:
arma::mat init_parameter(parameter_temp.n_rows, parameter_temp.n_cols, arma::fill::zeros);
my_new_parameter_map.insert(std::pair<std::string, arma::mat>(insert_id, init_parameter));

}

// Accumulating parameter. If there is a nan, then this will be ignored and
// the non nan entries are added up:
// arma::mat parameter_insert = parameter_temp + my_parameter_map.find(blearner->GetBaselearnerType())->second;
// my_parameter_map.insert(std::pair<std::string, arma::mat>(blearner->GetBaselearnerType(), parameter_insert));
my_new_parameter_map[ insert_id ] = parameter_temp + my_new_parameter_map.find(insert_id)->second;
}
}
return my_new_parameter_map;
}

// Create parameter matrix:
std::pair<std::vector<std::string>, arma::mat> BaselearnerTrack::GetParameterMatrix ()
{
// Instantiate list to iterate:
std::map<std::string, arma::mat> my_new_parameter_map = my_parameter_map;

unsigned int cols = 0;

// Set all parameter to zero in new map:
for (auto& it : my_new_parameter_map) {
arma::mat init_parameter (it.second.n_rows, it.second.n_cols, arma::fill::zeros);
my_new_parameter_map[ it.first ] = init_parameter;

cols += it.second.n_cols;
}

// Initialize matrix:
arma::mat parameters (blearner_vector.size(), cols, arma::fill::zeros);

for (unsigned int i = 0; i < blearner_vector.size(); i++) {
std::string insert_id = blearner_vector[i]->GetDataIdentifier() + ": " + blearner_vector[i]->GetBaselearnerType();

// Prune parameter by multiplying it with the learning rate:
arma::mat parameter_temp = learning_rate * blearner_vector[i]->GetParameter();

// Accumulating parameter. If there is a nan, then this will be ignored and
// the non nan entries are added up:
my_new_parameter_map[ insert_id ] = parameter_temp + my_new_parameter_map.find(insert_id)->second;

arma::mat param_insert;

for (auto& it : my_new_parameter_map) {
param_insert = arma::join_rows(param_insert, it.second);
}
parameters.row(i) = param_insert;
}
std::pair<std::vector<std::string>, arma::mat> out_pair;

for (auto& it : my_new_parameter_map) {
out_pair.first.push_back(it.first);
}
out_pair.second = parameters;

return out_pair;
}

// Destructor:
BaselearnerTrack::~BaselearnerTrack ()
{
std::cout << "Call BaselearnerTrack Destructor" << std::endl;
// std::cout << "Call BaselearnerTrack Destructor" << std::endl;
for (unsigned int i = 0; i< blearner_vector.size(); i++)
{
delete blearner_vector[i];
Expand Down
10 changes: 8 additions & 2 deletions src/baselearner_track.h
Expand Up @@ -64,12 +64,15 @@ class BaselearnerTrack
// iteration:
std::map<std::string, arma::mat> my_parameter_map;

double learning_rate;

public:

BaselearnerTrack ();
BaselearnerTrack (double);

// Insert a baselearner into vector and update parameter:
void InsertBaselearner (blearner::Baselearner*, double);
void InsertBaselearner (blearner::Baselearner*);

// Return the vector of baselearner:
std::vector<blearner::Baselearner*> GetBaselearnerVector ();
Expand All @@ -80,8 +83,11 @@ class BaselearnerTrack
// Clear the vector without deleting the data in the factory:
void ClearBaselearnerVector ();

// Estimate parameter for specific iteration:
std::map<std::string, arma::mat> GetEstimatedParameterForIteration (unsigned int);

// Returns a matrix of parameters for every iteration:
// arma::mat GetParameterMatrix ();
std::pair<std::vector<std::string>, arma::mat> GetParameterMatrix ();

// Destructor:
~BaselearnerTrack ();
Expand Down
30 changes: 14 additions & 16 deletions src/compboost.cpp
Expand Up @@ -59,7 +59,10 @@ Compboost::Compboost (arma::vec response, double learning_rate,
used_optimizer ( used_optimizer ),
used_loss ( used_loss ),
used_baselearner_list ( used_baselearner_list ),
used_logger ( used_logger ) {}
used_logger ( used_logger )
{
blearner_track = blearnertrack::BaselearnerTrack(learning_rate);
}

// --------------------------------------------------------------------------- #
// Member functions:
Expand Down Expand Up @@ -106,7 +109,7 @@ void Compboost::TrainCompboost (bool trace)
// std::cout << "<<Compboost>> Cast integer k to string for baselearner identifier" << std::endl;

// Insert new baselearner to vector of selected baselearner:
blearner_track.InsertBaselearner(selected_blearner, learning_rate);
blearner_track.InsertBaselearner(selected_blearner);
// std::cout << "<<Compboost>> Insert new baselearner to vector of selected baselearner" << std::endl;

// Update model (prediction) and shrink by learning rate:
Expand Down Expand Up @@ -170,20 +173,15 @@ std::vector<std::string> Compboost::GetSelectedBaselearner ()
return selected_blearner;
}

// arma::vec Compboost::PredictEnsemble ()
// {
// arma::vec prediction(response.size());
// prediction.fill(initialization);
//
// std::cout << "Initialize the b0 model with: " << initialization << std::endl;
// std::cout << "Rows = " << prediction.n_rows << " Cols = " << prediction.n_cols << std::endl;
//
// for (std::vector<blearner::Baselearner*>::iterator it = blearner_track->GetBaselearnerVector().begin(); it != blearner_track->GetBaselearnerVector().end(); ++it) {
// std::cout << "Now iterating over baselearner!" << std::endl;
// prediction += (*it)->predict();
// }
// return prediction;
// }
std::map<std::string, arma::mat> Compboost::GetParameterOfIteration (unsigned int k)
{
return blearner_track.GetEstimatedParameterForIteration(k);
}

std::pair<std::vector<std::string>, arma::mat> Compboost::GetParameterMatrix ()
{
return blearner_track.GetParameterMatrix();
}

// Destructor:
Compboost::~Compboost ()
Expand Down
7 changes: 4 additions & 3 deletions src/compboost.h
Expand Up @@ -73,7 +73,7 @@ class Compboost
bool stop_if_all_stopper_fulfilled;

// Pieces to run the algorithm:
blearnertrack::BaselearnerTrack blearner_track = blearnertrack::BaselearnerTrack();
blearnertrack::BaselearnerTrack blearner_track;
optimizer::Optimizer* used_optimizer;
loss::Loss* used_loss;
blearnerlist::BaselearnerList used_baselearner_list;
Expand All @@ -93,8 +93,9 @@ class Compboost
std::map<std::string, arma::mat> GetParameter ();
std::vector<std::string> GetSelectedBaselearner ();

// arma::vec PredictEnsemble ();
// arma::vec PredictEnsemble (arma::mat &);
std::map<std::string, arma::mat> GetParameterOfIteration (unsigned int);

std::pair<std::vector<std::string>, arma::mat> GetParameterMatrix ();

// Destructor:
~Compboost ();
Expand Down
28 changes: 26 additions & 2 deletions src/compboost_modules.cpp
Expand Up @@ -591,8 +591,8 @@ class CompboostWrapper
Rcpp::List getLoggerData ()
{
return Rcpp::List::create(
Rcpp::Named("logger_names") = used_logger->GetLoggerData().first,
Rcpp::Named("logger_data") = used_logger->GetLoggerData().second
Rcpp::Named("logger.names") = used_logger->GetLoggerData().first,
Rcpp::Named("logger.data") = used_logger->GetLoggerData().second
);
}

Expand All @@ -608,6 +608,28 @@ class CompboostWrapper
return out;
}

Rcpp::List getEstimatedParameterOfIteration (unsigned int k)
{
std::map<std::string, arma::mat> parameter = obj->GetParameterOfIteration(k);

Rcpp::List out;

for (auto &it : parameter) {
out[it.first] = it.second;
}
return out;
}

Rcpp::List getParameterMatrix ()
{
std::pair<std::vector<std::string>, arma::mat> out_pair = obj->GetParameterMatrix();

return Rcpp::List::create(
Rcpp::Named("parameter.names") = out_pair.first,
Rcpp::Named("parameter.matrix") = out_pair.second
);
}

// Destructor:
~CompboostWrapper ()
{
Expand Down Expand Up @@ -643,6 +665,8 @@ RCPP_MODULE (compboost_module)
.method("getSelectedBaselearner", &CompboostWrapper::getSelectedBaselearner, "Get vector of selected baselearner")
.method("getLoggerData", &CompboostWrapper::getLoggerData, "Get data of the used logger")
.method("getEstimatedParameter", &CompboostWrapper::getEstimatedParameter, "Get the estimated paraemter")
.method("getEstimatedParameterOfIteration", &CompboostWrapper::getEstimatedParameterOfIteration, "Get the estimated parameter for iteration k < iter.max")
.method("getParameterMatrix", &CompboostWrapper::getParameterMatrix, "Get matrix of all estimated parameter in each iteration")
;
}

42 changes: 39 additions & 3 deletions tests/testthat/test_compboost.R
Expand Up @@ -122,9 +122,45 @@ test_that("compboost does the same as mboost", {
unname(unlist(cboost$getEstimatedParameter()))
)

expect_equal(dim(cboost$getLoggerData()$logger_data), c(500, 2))
expect_equal(cboost$getLoggerData()$logger_data[, 1], 1:500)
expect_equal(length(cboost$getLoggerData()$logger_data[, 2]), 500)
expect_equal(dim(cboost$getLoggerData()$logger.data), c(500, 2))
expect_equal(cboost$getLoggerData()$logger.data[, 1], 1:500)
expect_equal(length(cboost$getLoggerData()$logger.data[, 2]), 500)

# Check if paraemter getter of smaller iteration works:
suppressWarnings({
mod.reduced = mboost(
formula = mpg ~ bols(hp, intercept = FALSE) +
bols(wt, intercept = FALSE) +
bols(hp2, intercept = FALSE),
data = df,
control = boost_control(mstop = 200, nu = learning.rate)
)
})

expect_equal(
unname(
unlist(
mod.reduced$coef()[
order(
unlist(
lapply(names(unlist(mod.reduced$coef()[1:3])), function (x) {
strsplit(x, "[.]")[[1]][2]
})
)
)
]
)
),
unname(unlist(cboost$getEstimatedParameterOfIteration(200)))
)

idx = 2:4 * 120
matrix.compare = matrix(NA_real_, nrow = 3, ncol = 3)

for (i in seq_along(idx)) {
matrix.compare[i, ] = unname(unlist(cboost$getEstimatedParameterOfIteration(idx[i])))
}

expect_equal(cboost$getParameterMatrix()$parameter.matrix[idx, ], matrix.compare)
})

6 changes: 6 additions & 0 deletions tutorials/compboost_class.R
Expand Up @@ -93,5 +93,11 @@ cboost$getSelectedBaselearner()
# Get estimated parameter:
cboost$getEstimatedParameter()

# Get estimated parameter of 200 iteration:
cboost$getEstimatedParameterOfIteration(200)

# Get logger data:
cboost$getLoggerData()

# Get parameter matrix:
param.matrix = cboost$getParameterMatrix()
13 changes: 13 additions & 0 deletions tutorials/compboost_vs_mboost.R
Expand Up @@ -152,3 +152,16 @@ p = profvis::profvis({

print(p)

# Check if parameter of smaller iteration works:
# ----------------------------------------------

mod.reduced = mboost(
formula = mpg ~ bols(hp, intercept = FALSE) +
bols(wt, intercept = FALSE) +
bols(hp2, intercept = FALSE),
data = df,
control = boost_control(mstop = 200, nu = learning.rate)
)

mod.reduced$coef()
cboost$getEstimatedParameterOfIteration(200)