Skip to content

Commit

Permalink
Modified unit test for Locally Linear Embedding, so that it allows 'm…
Browse files Browse the repository at this point in the history
…isses' of neighbors before and after the dimensionality reduction.
  • Loading branch information
vladislav-horbatiuk committed Apr 21, 2013
1 parent eb32fa6 commit 4786c8c
Showing 1 changed file with 37 additions and 9 deletions.
46 changes: 37 additions & 9 deletions tests/unit/converter/LocallyLinearEmbedding_unittest.cc
@@ -1,4 +1,5 @@
#include <vector> #include <vector>
#include <set>
#include <algorithm> /* heap operations, std::sort */ #include <algorithm> /* heap operations, std::sort */
#include <iostream> #include <iostream>


Expand Down Expand Up @@ -26,26 +27,29 @@ struct heap_comparator
} }
} comparator; } comparator;


std::vector<index_t> get_neighbors_indices(CDistance* distance_object, index_t feature_vector_index, index_t n_neighbors); std::set<index_t> get_neighbors_indices(CDistance* distance_object, index_t feature_vector_index, index_t n_neighbors);

void check_similarity_of_sets(const std::set<index_t>& first_set,const std::set<index_t>& second_set, float min_similarity_level);


TEST(LocallyLinearEmbeddingTest,neighbors_preserving) TEST(LocallyLinearEmbeddingTest,neighbors_preserving)
{ {
const index_t n_samples = 100; const index_t n_samples = 100;
const index_t n_gaussians = 1; const index_t n_gaussians = 1;
const index_t n_dimensions = 4; const index_t n_dimensions = 4;
const index_t n_target_dimensions = 3; const index_t n_target_dimensions = 3;
const index_t n_neighbors = 30; const index_t n_neighbors = 40;
const float required_similarity_level = 0.5; /*hope we will get rid of this*/
CDenseFeatures<float64_t>* high_dimensional_features = CDenseFeatures<float64_t>* high_dimensional_features =
new CDenseFeatures<float64_t>(CDataGenerator::generate_gaussians(n_samples, n_gaussians, n_dimensions)); new CDenseFeatures<float64_t>(CDataGenerator::generate_gaussians(n_samples, n_gaussians, n_dimensions));


CDistance* high_dimensional_dist = CDistance* high_dimensional_dist =
new CEuclideanDistance(high_dimensional_features, high_dimensional_features); new CEuclideanDistance(high_dimensional_features, high_dimensional_features);


std::vector<std::vector<index_t> > neighbors_for_vectors; std::vector<std::set<index_t> > high_dimensional_neighbors_for_vectors;
/* Find n_neighbors nearest eighbours for each vector */ /* Find n_neighbors nearest eighbours for each vector */
for (index_t i=0; i<n_samples; ++i) for (index_t i=0; i<n_samples; ++i)
{ {
neighbors_for_vectors.push_back(get_neighbors_indices(high_dimensional_dist, i, n_neighbors)); high_dimensional_neighbors_for_vectors.push_back(get_neighbors_indices(high_dimensional_dist, i, n_neighbors));
} }


CLocallyLinearEmbedding* lleEmbedder = CLocallyLinearEmbedding* lleEmbedder =
Expand All @@ -66,15 +70,16 @@ TEST(LocallyLinearEmbeddingTest,neighbors_preserving)


for (index_t i=0; i<n_samples; ++i) for (index_t i=0; i<n_samples; ++i)
{ {
ASSERT_EQ(neighbors_for_vectors[i], get_neighbors_indices(low_dimensional_dist, i, n_neighbors)); std::set<index_t> low_dimensional_neighbors = get_neighbors_indices(low_dimensional_dist, i, n_neighbors);
check_similarity_of_sets(high_dimensional_neighbors_for_vectors[i], low_dimensional_neighbors, required_similarity_level);
} }


SG_UNREF(lleEmbedder); SG_UNREF(lleEmbedder);
SG_UNREF(high_dimensional_dist); SG_UNREF(high_dimensional_dist);
SG_UNREF(low_dimensional_dist); SG_UNREF(low_dimensional_dist);
} }


std::vector<index_t> get_neighbors_indices(CDistance* distance_object, index_t feature_vector_index, index_t n_neighbors) std::set<index_t> get_neighbors_indices(CDistance* distance_object, index_t feature_vector_index, index_t n_neighbors)
{ {
index_t n_vectors = distance_object->get_num_vec_lhs(); index_t n_vectors = distance_object->get_num_vec_lhs();
EXPECT_EQ(n_vectors, distance_object->get_num_vec_rhs()); EXPECT_EQ(n_vectors, distance_object->get_num_vec_rhs());
Expand All @@ -96,15 +101,38 @@ std::vector<index_t> get_neighbors_indices(CDistance* distance_object, index_t f


/* Heapify, and then extract n_neighbors nearest neighbors*/ /* Heapify, and then extract n_neighbors nearest neighbors*/
std::make_heap(distances_and_indices.begin(), distances_and_indices.end(), comparator); std::make_heap(distances_and_indices.begin(), distances_and_indices.end(), comparator);
std::vector<index_t> neighbors_for_current_vector; std::set<index_t> neighbors_for_current_vector;
for (index_t j = 0; j < n_neighbors; ++j) for (index_t j = 0; j < n_neighbors; ++j)
{ {
neighbors_for_current_vector.push_back(distances_and_indices[0].neighbor_index); neighbors_for_current_vector.insert(distances_and_indices[0].neighbor_index);
std::pop_heap(distances_and_indices.begin(), distances_and_indices.end(), comparator); std::pop_heap(distances_and_indices.begin(), distances_and_indices.end(), comparator);
distances_and_indices.pop_back(); distances_and_indices.pop_back();
} }
std::sort(neighbors_for_current_vector.begin(), neighbors_for_current_vector.end());
return neighbors_for_current_vector; return neighbors_for_current_vector;
} }


void check_similarity_of_sets(const std::set<index_t>& first_set,const std::set<index_t>& second_set, float min_similarity_level)
{
index_t total_elements_count = first_set.size();
ASSERT_EQ(total_elements_count, second_set.size()) << "Can not compare sets of different size.";
ASSERT_LE(min_similarity_level, 1.0) << "Similarity level can not be greater than 1.";
ASSERT_GE(min_similarity_level, 0) << "Similarity level can not be less than 0.";
if (min_similarity_level == 0)
/*Nothing to do*/
return;
index_t similar_elements_count = 0;
std::set<index_t>::iterator first_iter = first_set.begin(), second_iter = second_set.begin();
while (first_iter != first_set.end() && second_iter != second_set.end())
{
if (*first_iter < *second_iter)
++first_iter;
else if (*second_iter < *first_iter)
++second_iter;
else
{
++similar_elements_count; ++first_iter; ++second_iter;
}
}
EXPECT_GE((float) similar_elements_count /(float) total_elements_count, min_similarity_level)<<"#similarElements/#total < minimal similarity level.";
}
#endif #endif

0 comments on commit 4786c8c

Please sign in to comment.