From 5aa89b7d1b3f59de03bb98b0f5ea90ea114f12d9 Mon Sep 17 00:00:00 2001 From: Saurabh7 Date: Thu, 2 Jun 2016 10:55:25 +0530 Subject: [PATCH] Update LARS --- .../regression/LeastAngleRegression.cpp | 178 +++++++++--------- src/shogun/regression/LeastAngleRegression.h | 25 ++- tests/unit/regression/lars_unittest.cc | 114 +++++++++++ 3 files changed, 218 insertions(+), 99 deletions(-) diff --git a/src/shogun/regression/LeastAngleRegression.cpp b/src/shogun/regression/LeastAngleRegression.cpp index 07e568ff0e8..539fba7d097 100644 --- a/src/shogun/regression/LeastAngleRegression.cpp +++ b/src/shogun/regression/LeastAngleRegression.cpp @@ -21,7 +21,9 @@ #include #include #include +#include +using namespace Eigen; using namespace shogun; using namespace std; @@ -80,6 +82,8 @@ CLeastAngleRegression::CLeastAngleRegression(bool lasso) : CLinearMachine(), m_lasso(lasso), m_max_nonz(0), m_max_l1_norm(0) { + m_epsilon = CMath::MACHINE_EPSILON; + SG_ADD(&m_epsilon, "epsilon", "Epsilon for early stopping", MS_AVAILABLE); SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE); SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE); } @@ -127,25 +131,26 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) fill(m_is_active.begin(), m_is_active.end(), false); SGVector y = ((CRegressionLabels*) m_labels)->get_labels(); + Map map_y(y.vector, y.size()); SGMatrix Xorig = feats->get_feature_matrix(); // transpose(X) is more convenient to work with since we care // about features here. After transpose, each row will be a data // point while each column corresponds to a feature - SGMatrix X(n_vec, n_fea, true); - for (int32_t i=0; i < n_vec; ++i) - { - for (int32_t j=0; j < n_fea; ++j) - X(i,j) = Xorig(j,i); - } + SGMatrix X (n_vec, n_fea); + Map map_Xr(Xorig.matrix, n_fea, n_vec); + Map map_X(X.matrix, n_vec, n_fea); + map_X = map_Xr.transpose(); + + SGMatrix X_active(n_vec, n_fea); // beta is the estimator vector beta = make_vector(n_fea, 0); vector Xy = make_vector(n_fea, 0); + Map map_Xy(&Xy[0], n_fea); // Xy = X' * y - cblas_dgemv(CblasColMajor, CblasTrans, n_vec, n_fea, 1, X.matrix, n_vec, - y.vector, 1, 0, &Xy[0], 1); + map_Xy=map_Xr*map_y; // mu is the prediction vector mu = make_vector(n_vec, 0); @@ -172,13 +177,14 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) // main loop //======================================== int32_t nloop=0; - while (m_num_active < max_active_allowed && max_corr > CMath::MACHINE_EPSILON && !stop_cond) + while (m_num_active < max_active_allowed && max_corr/n_vec > m_epsilon && !stop_cond) { // corr = X' * (y-mu) = - X'*mu + Xy - copy(Xy.begin(), Xy.end(), corr.begin()); - cblas_dgemv(CblasColMajor, CblasTrans, n_vec, n_fea, -1, - X.matrix, n_vec, &mu[0], 1, 1, &corr[0], 1); - + Map map_corr(&corr[0], n_fea); + Map map_mu(&mu[0], n_vec); + + map_corr = map_Xy - (map_Xr*map_mu); + // corr_sign = sign(corr) for (uint32_t i=0; i < corr.size(); ++i) corr_sign[i] = CMath::sign(corr[i]); @@ -189,63 +195,67 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) if (!lasso_cond) { // update Cholesky factorization matrix - R=cholesky_insert(X, R, i_max_corr); + if (m_num_active == 0) + { + // R isn't allocated yet + R=SGMatrix(1,1); + float64_t diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr)); + R(0,0) = CMath::sqrt(diag_k); + } + else + R=cholesky_insert(X, X_active, R, i_max_corr, m_num_active); activate_variable(i_max_corr); } - // corr_sign_a = corr_sign[m_active_set] - vector corr_sign_a(m_num_active); + // Active variables + Map map_Xa(X_active.matrix, n_vec, m_num_active); + if (!lasso_cond) + map_Xa.col(m_num_active-1)=map_X.col(i_max_corr); + + SGVector corr_sign_a(m_num_active); for (int32_t i=0; i < m_num_active; ++i) corr_sign_a[i] = corr_sign[m_active_set[i]]; - // GA1 = R\(R'\corr_sign_a) - vector GA1(corr_sign_a); - cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, - m_num_active, 1, 1, R.matrix, m_num_active, &GA1[0], m_num_active); - cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, - m_num_active, 1, 1, R.matrix, m_num_active, &GA1[0], m_num_active); + Map map_corr_sign_a(corr_sign_a.vector, corr_sign_a.size()); + Map map_R(R.matrix, R.num_rows, R.num_cols); + VectorXd solve = map_R.transpose().triangularView().solve(map_corr_sign_a); + VectorXd GA1 = map_R.triangularView().solve(solve); // AA = 1/sqrt(GA1' * corr_sign_a) - float64_t AA = cblas_ddot(m_num_active, &GA1[0], 1, &corr_sign_a[0], 1); + float64_t AA = GA1.dot(map_corr_sign_a); AA = 1/CMath::sqrt(AA); - // wA = AA*GA1 - vector wA(GA1); - for (int32_t i=0; i < m_num_active; ++i) - wA[i] *= AA; + VectorXd wA = AA*GA1; // equiangular direction (unit vector) vector u = make_vector(n_vec, 0); - // u = X[:,m_active_set] * wA - for (int32_t i=0; i < m_num_active; ++i) - { - // u += wA[i] * X[:,m_active_set[i]] - cblas_daxpy(n_vec, wA[i], - X.get_column_vector(m_active_set[i]), 1, &u[0], 1); - } + Map map_u(&u[0], n_vec); + + map_u = map_Xa*wA; - // step size float64_t gamma = max_corr / AA; if (m_num_active < n_fea) { + #pragma omp parallel for shared(gamma) for (int32_t i=0; i < n_fea; ++i) { if (m_is_active[i]) continue; // correlation between X[:,i] and u - float64_t dir_corr = cblas_ddot(n_vec, - X.get_column_vector(i), 1, &u[0], 1); + float64_t dir_corr = map_u.dot(map_X.col(i)); float64_t tmp1 = (max_corr-corr[i])/(AA-dir_corr); float64_t tmp2 = (max_corr+corr[i])/(AA+dir_corr); + #pragma omp critical + { if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma) gamma = tmp1; if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma) gamma = tmp2; + } } } - int32_t i_kick=-1; int32_t i_change=i_max_corr; if (m_lasso) @@ -256,7 +266,7 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) for (int32_t i=0; i < m_num_active; ++i) { - float64_t tmp = -beta[m_active_set[i]] / wA[i]; + float64_t tmp = -beta[m_active_set[i]] / wA(i); if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound) { lasso_bound = tmp; @@ -273,12 +283,11 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) } // update prediction: mu = mu + gamma * u - cblas_daxpy(n_vec, gamma, &u[0], 1, &mu[0], 1); + map_mu += gamma*map_u; // update estimator for (int32_t i=0; i < m_num_active; ++i) - beta[m_active_set[i]] += gamma * wA[i]; - + beta[m_active_set[i]] += gamma * wA(i); // early stopping on max l1-norm if (m_max_l1_norm > 0) { @@ -301,11 +310,16 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) // if lasso cond, drop the variable from active set if (lasso_cond) { - beta[i_change] = 0; // ensure it be zero - - // update Cholesky factorization + beta[i_change] = 0; R=cholesky_delete(R, i_kick); deactivate_variable(i_kick); + + // Remove column from active set + int32_t numRows = map_Xa.rows(); + int32_t numCols = map_Xa.cols()-1; + if( i_kick < numCols ) + map_Xa.block(0, i_kick, numRows, numCols-i_kick) = + map_Xa.block(0, i_kick+1, numRows, numCols-i_kick).eval(); } nloop++; @@ -318,8 +332,9 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) // early stopping with max number of non-zero variables if (m_max_nonz > 0 && m_num_active >= m_max_nonz) stop_cond = true; + SG_DEBUG("Added : %d , Dropped %d, Active set size %d max_corr %.17f \n", i_max_corr, i_kick, m_num_active, max_corr); - } // main loop + } // assign default estimator w.vlen = n_fea; @@ -328,56 +343,33 @@ bool CLeastAngleRegression::train_machine(CFeatures* data) return true; } -SGMatrix CLeastAngleRegression::cholesky_insert( - SGMatrix X, SGMatrix R, int32_t i_max_corr) +SGMatrix CLeastAngleRegression::cholesky_insert(const SGMatrix& X, + const SGMatrix& X_active, SGMatrix& R, int32_t i_max_corr, int32_t num_active) { - // diag_k = X[:,i_max_corr]' * X[:,i_max_corr] - float64_t diag_k = cblas_ddot(X.num_rows, X.get_column_vector(i_max_corr), 1, - X.get_column_vector(i_max_corr), 1); - - if (m_num_active == 0) - { // R isn't allocated yet - SGMatrix nR(1,1); - nR(0,0) = CMath::sqrt(diag_k); - return nR; - } - else - { - - // col_k is the k-th column of (X'X) - vector col_k(m_num_active); - for (int32_t i=0; i < m_num_active; ++i) - { - // col_k[i] = X[:,i_max_corr]' * X[:,m_active_set[i]] - col_k[i] = cblas_ddot(X.num_rows, X.get_column_vector(i_max_corr), 1, - X.get_column_vector(m_active_set[i]), 1); - } - - // R' * R_k = (X' * X)_k = col_k, solving to get R_k - vector R_k(col_k); - cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, m_num_active, 1, - 1, R.matrix, m_num_active, &R_k[0], m_num_active); - - float64_t R_kk = CMath::sqrt(diag_k - - cblas_ddot(m_num_active, &R_k[0], 1, &R_k[0], 1)); - - // new_R = [R R_k; zeros(...) R_kk] - SGMatrix nR(m_num_active+1, m_num_active+1); - for (int32_t i=0; i < m_num_active; ++i) - for (int32_t j=0; j < m_num_active; ++j) - nR(i,j) = R(i,j); - for (int32_t i=0; i < m_num_active; ++i) - nR(i, m_num_active) = R_k[i]; - for (int32_t i=0; i < m_num_active; ++i) - nR(m_num_active, i) = 0; - nR(m_num_active, m_num_active) = R_kk; - - return nR; - } - + Map map_X(X.matrix, X.num_rows, X.num_cols); + Map map_X_active(X_active.matrix, X.num_rows, num_active); + float64_t diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr)); + + // col_k is the k-th column of (X'X) + Map map_i_max(X.get_column_vector(i_max_corr), X.num_rows); + VectorXd R_k = map_X_active.transpose()*map_i_max; + Map map_R(R.matrix, R.num_rows, R.num_cols); + + // R' * R_k = (X' * X)_k = col_k, solving to get R_k + map_R.transpose().triangularView().solveInPlace(R_k); + float64_t R_kk = CMath::sqrt(diag_k - R_k.dot(R_k)); + + SGMatrix R_new(num_active+1, num_active+1); + Map map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols); + + map_R_new.block(0, 0, num_active, num_active) = map_R; + memcpy(R_new.matrix+num_active*(num_active+1), R_k.data(), sizeof(float64_t)*(num_active)); + map_R_new.row(num_active).setZero(); + map_R_new(num_active, num_active) = R_kk; + return R_new; } -SGMatrix CLeastAngleRegression::cholesky_delete(SGMatrix R, int32_t i_kick) +SGMatrix CLeastAngleRegression::cholesky_delete(SGMatrix& R, int32_t i_kick) { if (i_kick != m_num_active-1) { diff --git a/src/shogun/regression/LeastAngleRegression.h b/src/shogun/regression/LeastAngleRegression.h index 54d6cc7a297..722c64ebe52 100644 --- a/src/shogun/regression/LeastAngleRegression.h +++ b/src/shogun/regression/LeastAngleRegression.h @@ -168,12 +168,29 @@ class CLeastAngleRegression: public CLinearMachine return CT_LARS; } + /** Set epsilon used for early stopping */ + void set_epsilon(float64_t epsilon) + { + m_epsilon = epsilon; + } + + /** Get epsilon used for early stopping */ + float64_t get_epsilon() + { + return m_epsilon; + } + /** @return object name */ virtual const char* get_name() const { return "LeastAngleRegression"; } protected: virtual bool train_machine(CFeatures* data=NULL); + SGMatrix cholesky_insert(const SGMatrix& X, const SGMatrix& X_active, + SGMatrix& R, int32_t i_max_corr, int32_t num_active); + + SGMatrix cholesky_delete(SGMatrix& R, int32_t i_kick); + private: void activate_variable(int32_t v) { @@ -187,11 +204,7 @@ class CLeastAngleRegression: public CLinearMachine m_is_active[m_active_set[v_idx]] = false; m_active_set.erase(m_active_set.begin() + v_idx); } - - SGMatrix cholesky_insert(SGMatrix X, SGMatrix R, int32_t i_max_corr); - SGMatrix cholesky_delete(SGMatrix R, int32_t i_kick); - - + bool m_lasso; //!< enable lasso modification int32_t m_max_nonz; //!< max number of non-zero variables for early stopping @@ -199,10 +212,10 @@ class CLeastAngleRegression: public CLinearMachine std::vector > m_beta_path; std::vector m_beta_idx; - std::vector m_active_set; std::vector m_is_active; int32_t m_num_active; + float64_t m_epsilon; }; // class LARS } // namespace shogun diff --git a/tests/unit/regression/lars_unittest.cc b/tests/unit/regression/lars_unittest.cc index 6b117cab4ae..3292df08c3d 100644 --- a/tests/unit/regression/lars_unittest.cc +++ b/tests/unit/regression/lars_unittest.cc @@ -33,7 +33,12 @@ #include #include #include +#include +#include +#include +#include +using namespace Eigen; using namespace shogun; #ifdef HAVE_LAPACK @@ -228,4 +233,113 @@ TEST(LeastAngleRegression, lars_N_LessThan_D) SG_UNREF(features); SG_UNREF(labels); } + +TEST(LeastAngleRegression, cholesky_insert) +{ + class lars_helper: public CLeastAngleRegression + { + public: + SGMatrix cholesky_insert_helper(const SGMatrix& X, + const SGMatrix& X_active, SGMatrix& R, int32_t i, int32_t n) + { + return CLeastAngleRegression::cholesky_insert(X, X_active, R, i, n); + } + }; + + int32_t num_feats=5, num_vec=6; + SGMatrix R(num_feats, num_feats); + SGMatrix mat(num_vec, num_feats-1); + SGMatrix matnew(num_vec, num_feats); + + SGVector vec(num_vec); + vec.random(0.0,1.0); + Map map_vec(vec.vector, vec.size()); + + for (index_t i=0; i mat_old(mat.matrix, num_vec, num_feats-1); + Map mat_new(matnew.matrix, num_vec, num_feats); + Map map_R(R.matrix, num_feats, num_feats); + + MatrixXd XX=mat_old.transpose()*mat_old; + // Compute matrix R which has to be updated + SGMatrix R_old=linalg::cholesky(XX, false); + + // Update cholesky decomposition matrix R + lars_helper lars = lars_helper(); + SGMatrix R_new = lars.cholesky_insert_helper(matnew, mat, R_old, 4, 4); + Map map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols); + + // Compute true cholesky decomposition + MatrixXd XX_new=mat_new.transpose()*mat_new; + SGMatrix R_true=linalg::cholesky(XX_new, false); + + Map map_R_true(R_true.matrix, num_feats, num_feats); + EXPECT_NEAR( (map_R_true - map_R_new).norm(), 0.0, 1E-12); + +} + +TEST(LeastAngleRegression, ols_equivalence) +{ + int32_t n_feat=25, n_vec=100; + SGMatrix data(n_feat, n_vec); + for (index_t i=0; i lab=SGVector(n_vec); + lab.random(0.0,1.0); + + float64_t mean=linalg::mean(lab); + for (index_t i=0; i* features=new CDenseFeatures(data); + SG_REF(features); + + CPruneVarSubMean* proc1=new CPruneVarSubMean(); + CNormOne* proc2=new CNormOne(); + proc1->init(features); + proc1->apply_to_feature_matrix(features); + proc2->init(features); + proc2->apply_to_feature_matrix(features); + + CRegressionLabels* labels=new CRegressionLabels(lab); + SG_REF(labels); + CLeastAngleRegression* lars=new CLeastAngleRegression(false); + lars->set_labels((CLabels*) labels); + lars->train(features); + // Full LAR model + SGVector w=lars->get_w(); + Map map_w(w.vector, w.size()); + + SGMatrix mat=features->get_feature_matrix(); + Map feat(mat.matrix, mat.num_rows, mat.num_cols); + Map l(lab.vector, lab.size()); + // OLS + VectorXd solve=feat.transpose().colPivHouseholderQr().solve(l); + + // Check if full LAR model is equivalent to OLS + EXPECT_EQ( w.size(), n_feat); + EXPECT_NEAR( (map_w - solve).norm(), 0.0, 1E-12); + + + SG_UNREF(proc1); + SG_UNREF(proc2); + SG_UNREF(lars); + SG_UNREF(features); + SG_UNREF(labels); +} + #endif