diff --git a/src/shogun/metric/LMNN.cpp b/src/shogun/metric/LMNN.cpp index 011b5e92955..39590e56b26 100644 --- a/src/shogun/metric/LMNN.cpp +++ b/src/shogun/metric/LMNN.cpp @@ -8,14 +8,10 @@ #include #include +#include #include -// useful shorthand to perform operations with Eigen matrices -// trace of the product of two matrices computed fast using trace(A*B)=sum(A.*B') -#define TRACE(A,B) (((A).array()*(B).transpose().array()).sum()) - using namespace shogun; -using namespace Eigen; CLMNN::CLMNN() { @@ -66,15 +62,14 @@ void CLMNN::train(SGMatrix init_transform) CMulticlassLabels* y = CLabelsFactory::to_multiclass(m_labels); SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features()); - // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L - MatrixXd L = Map(init_transform.matrix, init_transform.num_rows, - init_transform.num_cols); + auto& L = init_transform; // Compute target or genuine neighbours SG_DEBUG("Finding target nearest neighbors.\n") SGMatrix target_nn = CLMNNImpl::find_target_nn(x, y, m_k); // Initialize (sub-)gradient SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n") - MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn); + auto gradient = CLMNNImpl::sum_outer_products(x, target_nn); + linalg::scale(gradient, gradient, 1 - m_regularization); // Value of the objective function at every iteration SGVector obj(m_maxiter); // The step size is modified depending on how the objective changes, leave the @@ -109,8 +104,13 @@ void CLMNN::train(SGMatrix init_transform) // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient // plus the number of current impostors to account for the margin + // trace of the product of two matrices computed fast using + // trace(A*B)=sum(A.*B') SG_DEBUG("Computing objective.\n") - obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size(); + obj[iter] = m_regularization * cur_impostors.size(); + obj[iter] += linalg::trace( + linalg::element_prod( + linalg::matrix_prod(L, L, true, false), gradient)); // Correct step size CLMNNImpl::correct_stepsize(stepsize, obj, iter); @@ -139,7 +139,8 @@ void CLMNN::train(SGMatrix init_transform) // Store the transformation found in the class attribute int32_t nfeats = x->get_num_features(); - float64_t* cloned_data = SGMatrix::clone_matrix(L.data(), nfeats, nfeats); + float64_t* cloned_data = + SGMatrix::clone_matrix(L.matrix, nfeats, nfeats); m_linear_transform = SGMatrix(cloned_data, nfeats, nfeats); SG_DEBUG("Leaving CLMNN::train().\n") @@ -153,23 +154,11 @@ SGMatrix CLMNN::get_linear_transform() const CCustomMahalanobisDistance* CLMNN::get_distance() const { // Compute Mahalanobis distance matrix M = L^T*L - - // Put the linear transform L in Eigen to perform the matrix multiplication - // L is not copied to another region of memory - Map map_linear_transform(m_linear_transform.matrix, - m_linear_transform.num_rows, m_linear_transform.num_cols); - // TODO exploit that M is symmetric - MatrixXd M = map_linear_transform.transpose()*map_linear_transform; - // TODO avoid copying - SGMatrix mahalanobis_matrix(M.rows(), M.cols()); - for (index_t i = 0; i < M.rows(); i++) - for (index_t j = 0; j < M.cols(); j++) - mahalanobis_matrix(i,j) = M(i,j); + auto M = linalg::matrix_prod( + m_linear_transform, m_linear_transform, true, false); // Create custom Mahalanobis distance with matrix M associated with the training features - - CCustomMahalanobisDistance* distance = - new CCustomMahalanobisDistance(m_features, m_features, mahalanobis_matrix); + auto distance = new CCustomMahalanobisDistance(m_features, m_features, M); SG_REF(distance) return distance; diff --git a/src/shogun/metric/LMNNImpl.cpp b/src/shogun/metric/LMNNImpl.cpp index 7d18814a8f9..9770eb01d2d 100644 --- a/src/shogun/metric/LMNNImpl.cpp +++ b/src/shogun/metric/LMNNImpl.cpp @@ -6,20 +6,13 @@ #include - +#include +#include #include -#include #include - -#include - -/// useful shorthands to perform operations with Eigen matrices - -// column-wise sum of the squared elements of a matrix -#define SUMSQCOLS(A) ((A).array().square().colwise().sum()) +#include using namespace shogun; -using namespace Eigen; CImpostorNode::CImpostorNode(index_t ex, index_t tar, index_t imp) : example(ex), target(tar), impostor(imp) @@ -95,11 +88,11 @@ SGMatrix CLMNNImpl::find_target_nn(CDenseFeatures* x, } } - MatrixXd slice_mat(d, slice_size); + SGMatrix slice_mat(d, slice_size); for (int32_t j = 0; j < slice_size; ++j) - slice_mat.col(j) = Map(x->get_feature_vector(idxsmap[j]).vector, d); + slice_mat.set_column(j, x->get_feature_vector(idxsmap[j])); - features_slice->set_feature_matrix(SGMatrix(slice_mat.data(), d, slice_size, false)); + features_slice->set_feature_matrix(slice_mat); //FIXME the labels are not actually necessary to get the nearest neighbors, the //features suffice. The labels are needed when we want to classify. @@ -131,49 +124,46 @@ SGMatrix CLMNNImpl::find_target_nn(CDenseFeatures* x, return target_neighbors; } -MatrixXd CLMNNImpl::sum_outer_products(CDenseFeatures* x, const SGMatrix target_nn) +SGMatrix CLMNNImpl::sum_outer_products( + CDenseFeatures* x, const SGMatrix& target_nn) { // get the number of features int32_t d = x->get_num_features(); // initialize the sum of outer products (sop) - MatrixXd sop(d,d); - sop.setZero(); - // map the feature matrix (each column is a feature vector) to an Eigen matrix - Map X(x->get_feature_matrix().matrix, d, x->get_num_vectors()); + SGMatrix sop(d, d); + + auto X = x->get_feature_matrix(); // sum the outer products stored in C using the indices specified in target_nn for (index_t i = 0; i < target_nn.num_cols; ++i) { for (index_t j = 0; j < target_nn.num_rows; ++j) { - VectorXd dx = X.col(i) - X.col(target_nn(j,i)); - sop += dx*dx.transpose(); + auto dx = linalg::add( + X.get_column(i), X.get_column(target_nn(j, i)), 1.0, -1.0); + linalg::dger(1.0, dx, dx, sop); } } return sop; } -ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures* x, - CMulticlassLabels* y, const MatrixXd& L, const SGMatrix target_nn, - const uint32_t iter, const uint32_t correction) +ImpostorsSetType CLMNNImpl::find_impostors( + CDenseFeatures* x, CMulticlassLabels* y, + const SGMatrix& L, const SGMatrix& target_nn, + const uint32_t iter, const uint32_t correction) { SG_SDEBUG("Entering CLMNNImpl::find_impostors().\n") - // get the number of examples from data - int32_t n = x->get_num_vectors(); - // get the number of features - int32_t d = x->get_num_features(); // get the number of neighbors int32_t k = target_nn.num_rows; - // map the feature matrix (each column is a feature vector) to an Eigen matrix - Map X(x->get_feature_matrix().matrix, d, n); + auto X = x->get_feature_matrix(); // transform the feature vectors - MatrixXd LX = L*X; + auto LX = linalg::matrix_prod(L, X); // compute square distances plus margin from examples to target neighbors - MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn); + auto sqdists = CLMNNImpl::compute_sqdists(LX, target_nn); // initialize impostors set ImpostorsSetType N; @@ -185,12 +175,14 @@ ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures* x, "impostors set must be greater than 0\n") if ((iter % correction)==0) { - Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k); + Nexact = CLMNNImpl::find_impostors_exact( + SGMatrix(LX), sqdists, y, target_nn, k); N = Nexact; } else { - N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn); + N = CLMNNImpl::find_impostors_approx( + SGMatrix(LX), sqdists, Nexact, target_nn); } SG_SDEBUG("Leaving CLMNNImpl::find_impostors().\n") @@ -198,58 +190,67 @@ ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures* x, return N; } -void CLMNNImpl::update_gradient(CDenseFeatures* x, MatrixXd& G, - const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization) +void CLMNNImpl::update_gradient( + CDenseFeatures* x, SGMatrix& G, + const ImpostorsSetType& Nc, const ImpostorsSetType& Np, + float64_t regularization) { // compute the difference sets ImpostorsSetType Np_Nc, Nc_Np; set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin())); set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin())); - // map the feature matrix (each column is a feature vector) to an Eigen matrix - Map X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors()); + auto X = x->get_feature_matrix(); // remove the gradient contributions of the impostors that were in the previous // set but disappeared in the current for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it) { - VectorXd dx1 = X.col(it->example) - X.col(it->target); - VectorXd dx2 = X.col(it->example) - X.col(it->impostor); - G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose()); + // G -= regularization*(dx1*dx1' - dx2*dx2'); + auto dx1 = linalg::add( + X.get_column(it->example), X.get_column(it->target), 1.0, -1.0); + auto dx2 = linalg::add( + X.get_column(it->example), X.get_column(it->impostor), 1.0, -1.0); + linalg::dger(-regularization, dx1, dx1, G); + linalg::dger(regularization, dx2, dx2, G); } // add the gradient contributions of the new impostors for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it) { - VectorXd dx1 = X.col(it->example) - X.col(it->target); - VectorXd dx2 = X.col(it->example) - X.col(it->impostor); - G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose()); + // G += regularization*(dx1*dx1' - dx2*dx2'); + auto dx1 = linalg::add( + X.get_column(it->example), X.get_column(it->target), 1.0, -1.0); + auto dx2 = linalg::add( + X.get_column(it->example), X.get_column(it->impostor), 1.0, -1.0); + linalg::dger(regularization, dx1, dx1, G); + linalg::dger(-regularization, dx2, dx2, G); } } -void CLMNNImpl::gradient_step(MatrixXd& L, const MatrixXd& G, float64_t stepsize, bool diagonal) +void CLMNNImpl::gradient_step( + SGMatrix& L, const SGMatrix& G, float64_t stepsize, + bool diagonal) { if (diagonal) { // compute M as the square of L - MatrixXd M = L.transpose()*L; + auto M = linalg::matrix_prod(L, L, true, false); // do step in M along the gradient direction - M -= stepsize*G; + linalg::add(M, G, M, 1.0, -stepsize); // keep only the elements in the diagonal of M - VectorXd m = M.diagonal(); - - VectorXd zero; - zero.resize(m.size()); - zero.setZero(); + auto m = M.get_diagonal_vector(); + for (auto i : range(m.vlen)) + m[i] = m > 0 ? CMath::sqrt(m[i]) : 0.0; // return to representation in L - VectorXd l = m.array().max(zero.array()).array().sqrt(); - L = l.asDiagonal(); + SGMatrix::create_diagonal_matrix(L.matrix, m.vector, m.vlen); } else { // do step in L along the gradient direction (no need to project M then) - L -= stepsize*(2*L*G); + // L -= stepsize*(2*L*G) + linalg::dgemm(-2.0 * stepsize, L, G, false, false, 1.0, L); } } @@ -331,25 +332,23 @@ SGMatrix CLMNNImpl::compute_pca_transform(CDenseFeatures* return pca_transform; } -MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix target_nn) +SGMatrix CLMNNImpl::compute_sqdists( + const SGMatrix& LX, const SGMatrix& target_nn) { // get the number of examples - ASSERT(LX.cols()==target_nn.num_cols) - int32_t n = LX.cols(); - // get the number of features - int32_t d = LX.rows(); + ASSERT(LX.num_cols == target_nn.num_cols) + int32_t n = LX.num_cols; // get the number of neighbors int32_t k = target_nn.num_rows; /// compute square distances to target neighbors plus margin // create Shogun features from LX to later apply subset - SGMatrix lx_mat(LX.data(), d, n, false); - CDenseFeatures* lx = new CDenseFeatures(lx_mat); + CDenseFeatures* lx = new CDenseFeatures(LX); // initialize distances - MatrixXd sqdists(k,n); - sqdists.setZero(); + SGMatrix sqdists(k, n); + for (int32_t i = 0; i < k; ++i) { //FIXME avoid copying the rows of target_nn and access them directly. Maybe @@ -359,7 +358,11 @@ MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix target lx->add_subset(subset_vec); // after the subset, there are still n columns, i.e. the subset is used to // modify the order of the columns in x according to the target neighbors - sqdists.row(i) = SUMSQCOLS(LX - Map(lx->get_feature_matrix().matrix, d, n)) + 1; + auto diff = linalg::add(LX, lx->get_feature_matrix(), 1.0, -1.0); + auto sum = linalg::colwise_sum(linalg::element_prod(diff, diff)); + for (int j = 0; j < sum.vlen; j++) + sqdists(i, j) = sum[j] + 1; + lx->remove_subset(); } @@ -369,21 +372,17 @@ MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix target return sqdists; } -ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX, const MatrixXd& sqdists, - CMulticlassLabels* y, const SGMatrix target_nn, int32_t k) +ImpostorsSetType CLMNNImpl::find_impostors_exact( + const SGMatrix& LX, const SGMatrix& sqdists, + CMulticlassLabels* y, const SGMatrix& target_nn, int32_t k) { SG_SDEBUG("Entering CLMNNImpl::find_impostors_exact().\n") // initialize empty impostors set ImpostorsSetType N = ImpostorsSetType(); - // get the number of examples from data - int32_t n = LX.cols(); - // get the number of features - int32_t d = LX.rows(); // create Shogun features from LX to later apply subset - SGMatrix lx_mat(LX.data(), d, n, false); - CDenseFeatures* lx = new CDenseFeatures(lx_mat); + CDenseFeatures* lx = new CDenseFeatures(LX); // get a vector with unique label values SGVector unique = y->get_unique_labels(); @@ -429,8 +428,9 @@ ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX, const MatrixXd& s return N; } -ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& sqdists, - const ImpostorsSetType& Nexact, const SGMatrix target_nn) +ImpostorsSetType CLMNNImpl::find_impostors_approx( + const SGMatrix& LX, const SGMatrix& sqdists, + const ImpostorsSetType& Nexact, const SGMatrix& target_nn) { SG_SDEBUG("Entering CLMNNImpl::find_impostors_approx().\n") @@ -462,20 +462,16 @@ ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& return N; } -SGVector CLMNNImpl::compute_impostors_sqdists(MatrixXd& LX, const ImpostorsSetType& Nexact) +SGVector CLMNNImpl::compute_impostors_sqdists( + const SGMatrix& LX, const ImpostorsSetType& Nexact) { - // get the number of examples - int32_t n = LX.cols(); - // get the number of features - int32_t d = LX.rows(); // get the number of impostors size_t num_impostors = Nexact.size(); /// compute square distances to impostors // create Shogun features from LX and distance - SGMatrix lx_mat(LX.data(), d, n, false); - CDenseFeatures* lx = new CDenseFeatures(lx_mat); + CDenseFeatures* lx = new CDenseFeatures(LX); CEuclideanDistance* euclidean = new CEuclideanDistance(lx,lx); euclidean->set_disable_sqrt(true); diff --git a/src/shogun/metric/LMNNImpl.h b/src/shogun/metric/LMNNImpl.h index 6a1b2d7480a..706ee6e69c6 100644 --- a/src/shogun/metric/LMNNImpl.h +++ b/src/shogun/metric/LMNNImpl.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -88,16 +87,25 @@ class CLMNNImpl static SGMatrix find_target_nn(CDenseFeatures* x, CMulticlassLabels* y, int32_t k); /** sum the outer products indicated by target_nn */ - static Eigen::MatrixXd sum_outer_products(CDenseFeatures* x, const SGMatrix target_nn); + static SGMatrix sum_outer_products( + CDenseFeatures* x, const SGMatrix& target_nn); /** find the impostors that remain after applying the transformation L */ - static ImpostorsSetType find_impostors(CDenseFeatures* x, CMulticlassLabels* y, const Eigen::MatrixXd& L, const SGMatrix target_nn, const uint32_t iter, const uint32_t correction); + static ImpostorsSetType find_impostors( + CDenseFeatures* x, CMulticlassLabels* y, + const SGMatrix& L, const SGMatrix& target_nn, + const uint32_t iter, const uint32_t correction); /** update the gradient using the last transition in the impostors sets */ - static void update_gradient(CDenseFeatures* x, Eigen::MatrixXd& G, const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t mu); + static void update_gradient( + CDenseFeatures* x, SGMatrix& G, + const ImpostorsSetType& Nc, const ImpostorsSetType& Np, + float64_t mu); /** take gradient step and project onto positive semi-definite cone if necessary */ - static void gradient_step(Eigen::MatrixXd& L, const Eigen::MatrixXd& G, float64_t stepsize, bool diagonal); + static void gradient_step( + SGMatrix& L, const SGMatrix& G, + float64_t stepsize, bool diagonal); /** correct step size depending on the last fluctuation of the objective */ static void correct_stepsize(float64_t& stepsize, const SGVector obj, const uint32_t iter); @@ -118,19 +126,26 @@ class CLMNNImpl * compute squared distances plus margin between each example and its target neighbors * in the transformed feature space */ - static Eigen::MatrixXd compute_sqdists(Eigen::MatrixXd& L, const SGMatrix target_nn); + static SGMatrix compute_sqdists( + const SGMatrix& L, const SGMatrix& target_nn); /** * compute squared distances between examples and impostors in the given impostors set * Nexact */ - static SGVector compute_impostors_sqdists(Eigen::MatrixXd& L, const ImpostorsSetType& Nexact); + static SGVector compute_impostors_sqdists( + const SGMatrix& L, const ImpostorsSetType& Nexact); /** find impostors; variant computing the impostors exactly, using all the data */ - static ImpostorsSetType find_impostors_exact(Eigen::MatrixXd& LX, const Eigen::MatrixXd& sqdists, CMulticlassLabels* y, const SGMatrix target_nn, int32_t k); + static ImpostorsSetType find_impostors_exact( + const SGMatrix& LX, const SGMatrix& sqdists, + CMulticlassLabels* y, const SGMatrix& target_nn, + int32_t k); /** find impostors; approximate variant, using the last exact set of impostors */ - static ImpostorsSetType find_impostors_approx(Eigen::MatrixXd& LX, const Eigen::MatrixXd& sqdists, const ImpostorsSetType& Nexact, const SGMatrix target_nn); + static ImpostorsSetType find_impostors_approx( + const SGMatrix& LX, const SGMatrix& sqdists, + const ImpostorsSetType& Nexact, const SGMatrix& target_nn); /** get the indices of the examples whose label is equal to yi */ static std::vector get_examples_label(CMulticlassLabels* y, float64_t yi); diff --git a/tests/unit/metric/LMNNImpl_unittest.cc b/tests/unit/metric/LMNNImpl_unittest.cc index 61caec5e1ff..644991c41eb 100644 --- a/tests/unit/metric/LMNNImpl_unittest.cc +++ b/tests/unit/metric/LMNNImpl_unittest.cc @@ -8,9 +8,10 @@ */ #include -#include #include #include +#include +#include using namespace shogun; @@ -91,11 +92,11 @@ TEST(LMNNImpl,sum_outer_products) target_nn(0,3)=2; // compute the sum of outer products of vector differences given by target_nn - Eigen::MatrixXd sop=CLMNNImpl::sum_outer_products(features,target_nn); + auto sop = CLMNNImpl::sum_outer_products(features, target_nn); // check output dimensions - EXPECT_EQ(sop.rows(), d); - EXPECT_EQ(sop.cols(), d); + EXPECT_EQ(sop.num_rows, d); + EXPECT_EQ(sop.num_cols, d); // check output contents EXPECT_EQ(sop(0,0), 8); EXPECT_EQ(sop(0,1), 0); @@ -109,8 +110,8 @@ TEST(LMNNImpl,sum_outer_products) sop=CLMNNImpl::sum_outer_products(features,target_nn); // check output dimensions - EXPECT_EQ(sop.rows(), d); - EXPECT_EQ(sop.cols(), d); + EXPECT_EQ(sop.num_rows, d); + EXPECT_EQ(sop.num_cols, d); // check output contents for (int32_t i=0; i(0,0)); // check output dimensions - EXPECT_EQ(sop.rows(), d); - EXPECT_EQ(sop.cols(), d); + EXPECT_EQ(sop.num_rows, d); + EXPECT_EQ(sop.num_cols, d); // check output contents for (int32_t i=0; i target_nn=CLMNNImpl::find_target_nn(features,labels,k); // find impostors with exact search (force exact search by setting correction=1) - ImpostorsSetType impostors=CLMNNImpl::find_impostors(features,labels, - Eigen::MatrixXd::Identity(d,d),target_nn,0,1); + SGMatrix L(d, d); + linalg::identity(L); + ImpostorsSetType impostors = + CLMNNImpl::find_impostors(features, labels, L, target_nn, 0, 1); // impostors ground truth computed externally index_t impostors_arr[] = {0,1,2, 0,1,3, 2,3,0, 2,3,1, 3,2,0, 3,2,1};