Skip to content

Commit

Permalink
Merge pull request #3042 from Saurabh7/eucl_dist_dot
Browse files Browse the repository at this point in the history
add dot products and precomputed norms to CEuclideanDistance
  • Loading branch information
karlnapf committed Mar 10, 2016
2 parents 2167fd5 + fa97054 commit 45d1c4e
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 3 deletions.
53 changes: 50 additions & 3 deletions src/shogun/distance/EuclideanDistance.cpp
Expand Up @@ -11,6 +11,7 @@
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
#include <shogun/distance/EuclideanDistance.h>
#include <shogun/mathematics/linalg/linalg.h>

using namespace shogun;

Expand Down Expand Up @@ -54,9 +55,19 @@ float64_t CEuclideanDistance::compute(int32_t idx_a, int32_t idx_b)
get_feature_vector(idx_b, blen, bfree);
ASSERT(alen==blen)

for (int32_t i=0; i<alen; i++)
result+=CMath::sq(avec[i] - bvec[i]);
result+=CMath::dot(avec, bvec, alen);
result*=-2;

if(m_rhs_squared_norms.vector)
result+=m_rhs_squared_norms[idx_b];
else
result+=CMath::dot(bvec, bvec, alen);

if(m_lhs_squared_norms.vector)
result+=m_lhs_squared_norms[idx_a];
else
result+=CMath::dot(avec, avec, alen);

((CDenseFeatures<float64_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
((CDenseFeatures<float64_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree);

Expand All @@ -66,11 +77,47 @@ float64_t CEuclideanDistance::compute(int32_t idx_a, int32_t idx_b)
return CMath::sqrt(result);
}

void CEuclideanDistance::precompute_rhs_squared_norms()
{
SGVector<float64_t>rhs_sq=SGVector<float64_t>(rhs->get_num_vectors());

for(int32_t idx_i =0; idx_i<rhs->get_num_vectors(); idx_i++)
{
SGVector<float64_t> tempvec=((CDenseFeatures<float64_t>*) rhs)->get_feature_vector(idx_i);
rhs_sq[idx_i]=linalg::dot(tempvec, tempvec);
((CDenseFeatures<float64_t>*) rhs)->free_feature_vector(tempvec, idx_i);
}

m_rhs_squared_norms=rhs_sq;
}

void CEuclideanDistance::precompute_lhs_squared_norms()
{
SGVector<float64_t>lhs_sq=SGVector<float64_t>(lhs->get_num_vectors());

for(int32_t idx_i=0; idx_i<lhs->get_num_vectors(); idx_i++)
{
SGVector<float64_t> tempvec=((CDenseFeatures<float64_t>*) lhs)->get_feature_vector(idx_i);
lhs_sq[idx_i]=linalg::dot(tempvec, tempvec);
((CDenseFeatures<float64_t>*) lhs)->free_feature_vector(tempvec, idx_i);
}

m_lhs_squared_norms=lhs_sq;
}

void CEuclideanDistance::reset_squared_norms()
{
m_lhs_squared_norms=SGVector<float64_t>();
m_rhs_squared_norms=SGVector<float64_t>();
}

void CEuclideanDistance::init()
{
disable_sqrt=false;

reset_squared_norms();
m_parameters->add(&disable_sqrt, "disable_sqrt", "If sqrt shall not be applied.");
m_parameters->add(&m_rhs_squared_norms, "m_rhs_squared_norms", "squared norms from features of right hand side");
m_parameters->add(&m_lhs_squared_norms, "m_lhs_squared_norms", "squared norms from features of left hand side");
}

float64_t CEuclideanDistance::distance_upper_bounded(int32_t idx_a, int32_t idx_b, float64_t upper_bound)
Expand Down
33 changes: 33 additions & 0 deletions src/shogun/distance/EuclideanDistance.h
Expand Up @@ -37,6 +37,13 @@ namespace shogun
* \f[\displaystyle
* d({\bf x},{\bf x'})= \sum_{i=0}^{n}|{\bf x_i}-{\bf x'_i}|^2
* \f]
*
* Distance is computed as :
* \sqrt{{\bf x}\cdot {\bf x} - 2{\bf x}\cdot {\bf x'} + {\bf x'}\cdot {\bf x'}}
*
* Squared norms for left hand side and right hand side features can be precomputed.
* WARNING : Make sure to reset squared norms using reset_squared_norms() when features
* or feature matrix are changed.
*
* @see CMinkowskiMetric
* @see <a href="http://en.wikipedia.org/wiki/Distance#Distance_in_Euclidean_space">
Expand Down Expand Up @@ -112,6 +119,26 @@ class CEuclideanDistance: public CRealDistance
* @return distance value or upper_bound
*/
virtual float64_t distance_upper_bounded(int32_t idx_a, int32_t idx_b, float64_t upper_bound);

/**
* Precompute squared norms from features of right hand side
* WARNING : Make sure to reset squared norms using reset_squared_norms()
* when features feature matrix are changed.
*/
virtual void precompute_rhs_squared_norms();

/**
* Precompute squared norms from features of left hand side
* WARNING : Make sure to reset squared norms using reset_squared_norms()
* when features or feature matrix are changed.
*/
virtual void precompute_lhs_squared_norms();

/**
* Reset squared norms for features of both sides
* squared norms should be reset whenever features or feature matrix are changed.
*/
virtual void reset_squared_norms();

protected:
/// compute kernel function for features a and b
Expand All @@ -125,6 +152,12 @@ class CEuclideanDistance: public CRealDistance
protected:
/** if application of sqrt on matrix computation is disabled */
bool disable_sqrt;

/** squared norms from features of right hand side */
SGVector<float64_t> m_rhs_squared_norms;

/** squared norms from features of left hand side */
SGVector<float64_t> m_lhs_squared_norms;
};

} // namespace shogun
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/distance/EuclideanDistance_unittest.cc
Expand Up @@ -63,6 +63,28 @@ TEST(EuclideanDistance,distance)
SG_UNREF(euclidean); // the features are unref-ed here as well
}

TEST(EuclideanDistance, distance_precomputed_norms)
{
CDenseFeatures<float64_t>* features_lhs=create_lhs();
CDenseFeatures<float64_t>* features_rhs=create_rhs();

// put features into distance object to compute squared Euclidean distances
CEuclideanDistance* euclidean=new CEuclideanDistance(features_lhs,features_rhs);
euclidean->set_disable_sqrt(true);
euclidean->precompute_lhs_squared_norms();
euclidean->precompute_rhs_squared_norms();

// check distances computed one by one
EXPECT_EQ(euclidean->distance(0,0), 2);
EXPECT_EQ(euclidean->distance(0,1), 2);
EXPECT_EQ(euclidean->distance(1,0), 5);
EXPECT_EQ(euclidean->distance(1,1), 5);

// release memory
SG_UNREF(euclidean); // the features are unref-ed here as well
}


TEST(EuclideanDistance,get_distance_matrix)
{
init_shogun_with_defaults();
Expand Down

0 comments on commit 45d1c4e

Please sign in to comment.