Skip to content

Commit

Permalink
[WIP] drop obtain_from and use either ctors or .as<>
Browse files Browse the repository at this point in the history
  • Loading branch information
vigsterkr committed Feb 6, 2018
1 parent 56ae5e1 commit ba6b0c5
Show file tree
Hide file tree
Showing 17 changed files with 139 additions and 133 deletions.
Expand Up @@ -33,7 +33,7 @@ int main(int argc, char** argv)
CFeatures* neg = gen_n->get_streamed_features(num_pos);
CFeatures* pos = gen_p->get_streamed_features(num_neg);
CDenseFeatures<float64_t>* train_feats =
CDenseFeatures<float64_t>::obtain_from_generic(neg->create_merged_copy(pos));
neg->create_merged_copy(pos)->as<CDenseFeatures<float64_t>>();

SGVector<float64_t> tl(num_neg+num_pos);
tl.set_const(1);
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/libshogun/minibatchKMeans.cpp
Expand Up @@ -44,7 +44,7 @@ int main(int argc, char **argv)
for (index_t i=0; i<result->get_num_labels(); ++i)
SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i));

CDenseFeatures<float64_t>* centers=CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
CDenseFeatures<float64_t>* centers=distance->get_lhs()->as<CDenseFeatures<float64_t>>();
SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix();
centers_matrix.display_matrix(centers_matrix.matrix,
centers_matrix.num_rows, centers_matrix.num_cols, "learnt centers using Lloyd's KMeans");
Expand Down
8 changes: 4 additions & 4 deletions src/shogun/clustering/KMeans.cpp
@@ -1,8 +1,8 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Heiko Strathmann, Soeren Sonnenburg, Saurabh Mahindre,
* Sergey Lisitsyn, Evan Shelhamer, Soumyajit De, Fernando Iglesias,
* Authors: Heiko Strathmann, Soeren Sonnenburg, Saurabh Mahindre,
* Sergey Lisitsyn, Evan Shelhamer, Soumyajit De, Fernando Iglesias,
* Björn Esser, parijat
*/

Expand Down Expand Up @@ -39,8 +39,8 @@ CKMeans::~CKMeans()

void CKMeans::Lloyd_KMeans(SGMatrix<float64_t> centers, int32_t num_centers)
{
CDenseFeatures<float64_t>* lhs=
CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
CDenseFeatures<float64_t>* lhs =
distance->get_lhs()->as<CDenseFeatures<float64_t>>();

int32_t lhs_size=lhs->get_num_vectors();
int32_t dim=lhs->get_num_features();
Expand Down
32 changes: 16 additions & 16 deletions src/shogun/clustering/KMeansBase.cpp
Expand Up @@ -46,7 +46,7 @@ CKMeansBase::~CKMeansBase()

void CKMeansBase::set_initial_centers(SGMatrix<float64_t> centers)
{
CDenseFeatures<float64_t>* lhs=((CDenseFeatures<float64_t>*) distance->get_lhs());
CDenseFeatures<float64_t>* lhs=distance->get_lhs()->as<CDenseFeatures<float64_t>>();
dimensions=lhs->get_num_features();
REQUIRE(centers.num_cols == k,
"Expected %d initial cluster centers, got %d", k, centers.num_cols);
Expand All @@ -60,9 +60,9 @@ void CKMeansBase::set_random_centers()
{
mus.zero();
CDenseFeatures<float64_t>* lhs=
CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
distance->get_lhs()->as<CDenseFeatures<float64_t>>();
int32_t lhs_size=lhs->get_num_vectors();

SGVector<int32_t> temp=SGVector<int32_t>(lhs_size);
SGVector<int32_t>::range_fill_vector(temp, lhs_size, 0);
CMath::permute(temp);
Expand Down Expand Up @@ -134,12 +134,12 @@ void CKMeansBase::initialize_training(CFeatures* data)
{
REQUIRE(distance, "Distance is not provided")
REQUIRE(distance->get_feature_type()==F_DREAL, "Distance's features type (%d) should be of type REAL (%d)")

if (data)
distance->init(data, data);

CDenseFeatures<float64_t>* lhs=
CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
distance->get_lhs()->as<CDenseFeatures<float64_t>>();

REQUIRE(lhs, "Lhs features of distance not provided");
int32_t lhs_size=lhs->get_num_vectors();
Expand Down Expand Up @@ -225,7 +225,7 @@ SGMatrix<float64_t> CKMeansBase::get_cluster_centers()
return SGMatrix<float64_t>();

CDenseFeatures<float64_t>* lhs=
(CDenseFeatures<float64_t>*)distance->get_lhs();
distance->get_lhs()->as<CDenseFeatures<float64_t>>();
SGMatrix<float64_t> centers=lhs->get_feature_matrix();
SG_UNREF(lhs);
return centers;
Expand Down Expand Up @@ -261,17 +261,17 @@ void CKMeansBase::store_model_features()
SGMatrix<float64_t> CKMeansBase::kmeanspp()
{
int32_t lhs_size;
CDenseFeatures<float64_t>* lhs=(CDenseFeatures<float64_t>*)distance->get_lhs();
CDenseFeatures<float64_t>* lhs=distance->get_lhs()->as<CDenseFeatures<float64_t>>();
lhs_size=lhs->get_num_vectors();

SGMatrix<float64_t> centers=SGMatrix<float64_t>(dimensions, k);
centers.zero();
SGVector<float64_t> min_dist=SGVector<float64_t>(lhs_size);
min_dist.zero();

/* First center is chosen at random */
int32_t mu=CMath::random((int32_t) 0, lhs_size-1);
SGVector<float64_t> mu_first=lhs->get_feature_vector(mu);
SGVector<float64_t> mu_first=lhs->get_feature_vector(mu);
for(int32_t j=0; j<dimensions; j++)
centers(j, 0)=mu_first[j];

Expand All @@ -292,21 +292,21 @@ SGMatrix<float64_t> CKMeansBase::kmeanspp()

/* Choose centers with weighted probability */
for(int32_t i=1; i<k; i++)
{
int32_t best_center=0;
{
int32_t best_center=0;
float64_t best_sum=-1.0;
SGVector<float64_t> best_min_dist=SGVector<float64_t>(lhs_size);

/* local tries for best center */
for(int32_t trial=0; trial<n_rands; trial++)
{
float64_t temp_sum=0.0;
float64_t temp_sum=0.0;
float64_t temp_dist=0.0;
SGVector<float64_t> temp_min_dist=SGVector<float64_t>(lhs_size);
int32_t new_center=0;
SGVector<float64_t> temp_min_dist=SGVector<float64_t>(lhs_size);
int32_t new_center=0;
float64_t prob=CMath::random(0.0, 1.0);
prob=prob*sum;

for(int32_t j=0; j<lhs_size; j++)
{
temp_sum+=min_dist[j];
Expand Down Expand Up @@ -339,7 +339,7 @@ SGMatrix<float64_t> CKMeansBase::kmeanspp()
best_center=new_center;
}
}

SGVector<float64_t> vec=lhs->get_feature_vector(best_center);
for(int32_t j=0; j<dimensions; j++)
centers(j, i)=vec[j];
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/clustering/KMeansMiniBatch.cpp
Expand Up @@ -75,7 +75,7 @@ void CKMeansMiniBatch::minibatch_KMeans()
"number of iterations not set to positive value. Current iterations %d \n", minib_iter);

CDenseFeatures<float64_t>* lhs=
CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
distance->get_lhs()->as<CDenseFeatures<float64_t>>();
CDenseFeatures<float64_t>* rhs_mus=new CDenseFeatures<float64_t>(mus);
CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
int32_t XSize=lhs->get_num_vectors();
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/distributions/KernelDensity.cpp
Expand Up @@ -57,7 +57,7 @@ CKernelDensity::~CKernelDensity()
bool CKernelDensity::train(CFeatures* data)
{
REQUIRE(data,"Data not supplied\n")
CDenseFeatures<float64_t>* dense_data=CDenseFeatures<float64_t>::obtain_from_generic(data);
CDenseFeatures<float64_t>* dense_data=data->as<CDenseFeatures<float64_t>>();

SG_UNREF(tree);
switch (m_eval)
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/features/CombinedDotFeatures.h
@@ -1,8 +1,8 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Soeren Sonnenburg, Sergey Lisitsyn, Heiko Strathmann,
* Vladislav Horbatiuk, Evgeniy Andreev, Yuyu Zhang, Evan Shelhamer,
* Authors: Soeren Sonnenburg, Sergey Lisitsyn, Heiko Strathmann,
* Vladislav Horbatiuk, Evgeniy Andreev, Yuyu Zhang, Evan Shelhamer,
* Björn Esser, Evangelos Anagnostopoulos
*/

Expand Down
64 changes: 26 additions & 38 deletions src/shogun/features/DenseFeatures.cpp
@@ -1,10 +1,10 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Soeren Sonnenburg, Saurabh Mahindre, Soumyajit De, Heiko Strathmann,
* Sergey Lisitsyn, Sanuj Sharma, Chiyuan Zhang, Viktor Gal,
* Michele Mazzoni, Vladislav Horbatiuk, Kevin Hughes, Weijie Lin,
* Fernando Iglesias, Björn Esser, Evgeniy Andreev,
* Authors: Soeren Sonnenburg, Saurabh Mahindre, Soumyajit De, Heiko Strathmann,
* Sergey Lisitsyn, Sanuj Sharma, Chiyuan Zhang, Viktor Gal,
* Michele Mazzoni, Vladislav Horbatiuk, Kevin Hughes, Weijie Lin,
* Fernando Iglesias, Björn Esser, Evgeniy Andreev,
* Christopher Goldsworthy
*/

Expand Down Expand Up @@ -61,6 +61,28 @@ template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
load(loader);
}

template<class ST> CDenseFeatures<ST>::CDenseFeatures(CDotFeatures* features) :
CDotFeatures()
{
init();

auto num_feat = features->get_dim_feature_space();
auto num_vec = features->get_num_vectors();

ASSERT(num_feat>0 && num_vec>0)
feature_matrix = SGMatrix<ST>(num_feat, num_vec);
for (auto i = 0; i < num_vec; i++)
{
SGVector<float64_t> v = features->get_computed_dot_feature_vector(i);
ASSERT(num_feat==v.vlen)

for (auto j = 0; j < num_feat; j++)
feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
}
num_features = num_feat;
num_vectors = num_vec;
}

template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
{
return new CDenseFeatures<ST>(*this);
Expand Down Expand Up @@ -366,30 +388,6 @@ template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int
return fm;
}

template<class ST> void CDenseFeatures<ST>::obtain_from_dot(CDotFeatures* df)
{
m_subset_stack->remove_all_subsets();

int32_t num_feat = df->get_dim_feature_space();
int32_t num_vec = df->get_num_vectors();

ASSERT(num_feat>0 && num_vec>0)

free_feature_matrix();
feature_matrix = SGMatrix<ST>(num_feat, num_vec);

for (int32_t i = 0; i < num_vec; i++)
{
SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
ASSERT(num_feat==v.vlen)

for (int32_t j = 0; j < num_feat; j++)
feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
}
num_features = num_feat;
num_vectors = num_vec;
}

template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
{
if (m_subset_stack->has_subsets())
Expand Down Expand Up @@ -468,8 +466,6 @@ template<class ST> void CDenseFeatures<ST>::initialize_cache()
}
}

template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }

template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
{
if (m_subset_stack->has_subsets())
Expand Down Expand Up @@ -1082,14 +1078,6 @@ void CDenseFeatures<ST>::save(CFile* writer)
feature_matrix.save(writer);
}

template< class ST > CDenseFeatures< ST >* CDenseFeatures< ST >::obtain_from_generic(CFeatures* const base_features)
{
REQUIRE(base_features->get_feature_class() == C_DENSE,
"base_features must be of dynamic type CDenseFeatures\n")

return (CDenseFeatures< ST >*) base_features;
}

template class CDenseFeatures<bool>;
template class CDenseFeatures<char>;
template class CDenseFeatures<int8_t>;
Expand Down
23 changes: 8 additions & 15 deletions src/shogun/features/DenseFeatures.h
@@ -1,8 +1,8 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Soumyajit De, Heiko Strathmann, Saurabh Mahindre, Soeren Sonnenburg,
* Vladislav Horbatiuk, Yuyu Zhang, Kevin Hughes, Evgeniy Andreev,
* Authors: Soumyajit De, Heiko Strathmann, Saurabh Mahindre, Soeren Sonnenburg,
* Vladislav Horbatiuk, Yuyu Zhang, Kevin Hughes, Evgeniy Andreev,
* Thoralf Klein, Fernando Iglesias, Björn Esser, Sergey Lisitsyn
*/

Expand Down Expand Up @@ -87,6 +87,12 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec);

/** constructor from DotFeatures
*
* @param features DotFeatures object
*/
CDenseFeatures(CDotFeatures* features);

/** constructor loading features from file
*
* @param loader File object via which to load data
Expand Down Expand Up @@ -255,14 +261,6 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
ST* get_transposed(int32_t &num_feat, int32_t &num_vec);

/** obtain dense features from other dotfeatures
*
* removes any subset before
*
* @param df dotfeatures to obtain features from
*/
void obtain_from_dot(CDotFeatures* df);

/** apply preprocessor
*
* applies preprocessors to ALL features (subset removed before and
Expand Down Expand Up @@ -495,11 +493,6 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
CFeatures* create_merged_copy(CFeatures* other);

/** helper method used to specialize a base class instance
*
*/
static CDenseFeatures* obtain_from_generic(CFeatures* const base_features);

#ifndef SWIG // SWIG should skip this part
virtual CFeatures* shallow_subset_copy();
#endif
Expand Down
42 changes: 35 additions & 7 deletions src/shogun/features/Features.h
@@ -1,8 +1,8 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Heiko Strathmann, Soeren Sonnenburg, Sergey Lisitsyn,
* Saurabh Mahindre, Evgeniy Andreev, Wu Lin, Vladislav Horbatiuk,
* Authors: Heiko Strathmann, Soeren Sonnenburg, Sergey Lisitsyn,
* Saurabh Mahindre, Evgeniy Andreev, Wu Lin, Vladislav Horbatiuk,
* Yuyu Zhang, Björn Esser, Soumyajit De
*/

Expand Down Expand Up @@ -335,19 +335,47 @@ class CFeatures : public CSGObject
*/
virtual bool support_compatible_class() const {return false;}

/** Given a class in right hand side, does this class support compatible computation?
/** Given a class in right hand side, does this class support compatible computation?
*
* for example, is this->dot(rhs_prt) valid,
* where rhs_prt is the class in right hand side
* where rhs_prt is the class in right hand side
*
* @param rhs the class in right hand side
* @param rhs the class in right hand side
* @return whether this class supports compatible computation
*/
virtual bool get_feature_class_compatibility (EFeatureClass rhs) const;

#ifndef SWIG // SWIG should skip this part
virtual CFeatures* shallow_subset_copy()
{
/**
* Utility method to specialize the feature to the required type.
*
* @param f CFeatures base type
* @return The requested type if casting was successful.
*/
template<class T> static T* as(CFeatures* f)
{
REQUIRE(f, "Features is null!\n");
return f->as<T>();
}

/**
* Utility method to specialize the feature to the required type.
*
* @param f CFeatures base type
* @return The requested type if casting was successful.
*/
template<class T> T* as()
{
if (typeid(T) != typeid(*this))
{
SG_SERROR("Provided features (%s) is not of requested type %s!\n",
typeid(*this).name(), typeid(T).name());
}
return (T*)this;
}

virtual CFeatures* shallow_subset_copy()
{
SG_SNOTIMPLEMENTED;
return NULL;
}
Expand Down

0 comments on commit ba6b0c5

Please sign in to comment.