Skip to content

Commit

Permalink
Drop apply_preprocessor and preproc states in features
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored and vigsterkr committed May 31, 2018
1 parent 336ab52 commit 74d8dba
Show file tree
Hide file tree
Showing 12 changed files with 10 additions and 213 deletions.
Expand Up @@ -57,8 +57,8 @@ def features_string_char_compressed (fname):
# load compressed data and uncompress via preprocessor
f2=StringCharFeatures(RAWBYTE);
f2.load_compressed("tmp/foo_lzo.str", False)
f2.add_preprocessor(DecompressCharString(LZO))
f2.apply_preprocessor()
preproc = DecompressCharString(LZO)
f2 = preproc.transform(f2)
#print("lzo strings", f2.get_features())
#print

Expand Down
42 changes: 0 additions & 42 deletions src/shogun/features/DenseFeatures.cpp
Expand Up @@ -388,48 +388,6 @@ template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int
return fm;
}

template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
{
if (m_subset_stack->has_subsets())
SG_ERROR("A subset is set, cannot call apply_preproc\n")

SG_DEBUG("force: %d\n", force_preprocessing)

if (feature_matrix.matrix && get_num_preprocessors())
{
for (int32_t i = 0; i < get_num_preprocessors(); i++)
{
if ((!is_preprocessed(i) || force_preprocessing))
{
set_preprocessed(i);
CDensePreprocessor<ST>* p =
(CDensePreprocessor<ST>*) get_preprocessor(i);
SG_INFO("preprocessing using preproc %s\n", p->get_name())

if (p->apply_to_feature_matrix(this).matrix == NULL)
{
SG_UNREF(p);
return false;
}
SG_UNREF(p);

}
}

return true;
}
else
{
if (!feature_matrix.matrix)
SG_ERROR("no feature matrix\n")

if (!get_num_preprocessors())
SG_ERROR("no preprocessors available\n")

return false;
}
}

template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
{
return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
Expand Down
12 changes: 0 additions & 12 deletions src/shogun/features/DenseFeatures.h
Expand Up @@ -261,18 +261,6 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
ST* get_transposed(int32_t &num_feat, int32_t &num_vec);

/** apply preprocessor
*
* applies preprocessors to ALL features (subset removed before and
* restored afterwards)
*
* not possible with subset
*
* @param force_preprocessing if preprocssing shall be forced
* @return if applying was successful
*/
virtual bool apply_preprocessor(bool force_preprocessing = false);

/** get number of feature vectors
*
* @return number of feature vectors
Expand Down
42 changes: 2 additions & 40 deletions src/shogun/features/Features.cpp
Expand Up @@ -28,13 +28,10 @@ CFeatures::CFeatures(const CFeatures& orig)
{
init();

// Call to init creates new preproc and preprocessed arrays.
// Call to init creates new preproc arrays.
SG_UNREF(preproc);
SG_UNREF(preprocessed);
preproc = orig.preproc;
preprocessed = orig.preprocessed;
SG_REF(preproc);
SG_REF(preprocessed);
}

CFeatures::CFeatures(CFile* loader)
Expand All @@ -51,7 +48,6 @@ CFeatures::~CFeatures()
clean_preprocessors();
SG_UNREF(m_subset_stack);
SG_UNREF(preproc);
SG_UNREF(preprocessed);
}

void CFeatures::init()
Expand All @@ -61,8 +57,6 @@ void CFeatures::init()

SG_ADD((CSGObject**) &preproc, "preproc", "Array of preprocessors.",
MS_NOT_AVAILABLE);
SG_ADD((CSGObject**) &preprocessed, "preprocessed", "Array of preprocessed.",
MS_NOT_AVAILABLE);

SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets",
MS_NOT_AVAILABLE);
Expand All @@ -73,17 +67,14 @@ void CFeatures::init()
properties = FP_NONE;
cache_size = 0;
preproc = new CDynamicObjectArray();
preprocessed = new CDynamicArray<bool>();
SG_REF(preproc);
SG_REF(preprocessed);
}

void CFeatures::add_preprocessor(CPreprocessor* p)
{
ASSERT(p)

preproc->push_back(p);
preprocessed->push_back(false);
}

CPreprocessor* CFeatures::get_preprocessor(int32_t num) const
Expand All @@ -96,31 +87,16 @@ CPreprocessor* CFeatures::get_preprocessor(int32_t num) const
return NULL;
}

int32_t CFeatures::get_num_preprocessed() const
{
int32_t num=0;

for (int32_t i=0; i<preproc->get_num_elements(); i++)
{
if ((*preprocessed)[i])
num++;
}

return num;
}

void CFeatures::clean_preprocessors()
{
preproc->reset_array();
preprocessed->reset_array();
}

void CFeatures::del_preprocessor(int32_t num)
{
if (num<preproc->get_num_elements() && num>=0)
{
preproc->delete_element(num);
preprocessed->delete_element(num);
}
}

Expand All @@ -130,24 +106,10 @@ void CFeatures::list_preprocessors()

for (int32_t i=0; i<num_preproc; i++)
{
SG_INFO("preproc[%d]=%s applied=%s\n",i,
preproc->get_element(i)->get_name(),
preprocessed->get_element(i) ? "true" : "false");
SG_INFO("preproc[%d]=%s\n", i, preproc->get_element(i)->get_name());
}
}

void CFeatures::set_preprocessed(int32_t num)
{
ASSERT(num<preprocessed->get_num_elements() && num>=0);
(*preprocessed)[num]=true;
}

bool CFeatures::is_preprocessed(int32_t num) const
{
ASSERT(num<preprocessed->get_num_elements() && num>=0);
return (*preprocessed)[num];
}

int32_t CFeatures::get_num_preprocessors() const
{
return preproc->get_num_elements();
Expand Down
21 changes: 0 additions & 21 deletions src/shogun/features/Features.h
Expand Up @@ -140,24 +140,6 @@ class CFeatures : public CSGObject
*/
CPreprocessor* get_preprocessor(int32_t num) const;

/** set applied flag for preprocessor
*
* @param num index of preprocessor in list
*/
void set_preprocessed(int32_t num);

/** get whether specified preprocessor was already applied
*
* @param num index of preprocessor in list
*/
bool is_preprocessed(int32_t num) const;

/** get the number of applied preprocs
*
* @return number of applied preprocessors
*/
int32_t get_num_preprocessed() const;

/** get number of preprocessors
*
* @return number of preprocessors
Expand Down Expand Up @@ -375,9 +357,6 @@ class CFeatures : public CSGObject
/** list of preprocessors */
CDynamicObjectArray* preproc;

/** i'th entry is true if features were already preprocessed with preproc i */
CDynamicArray<bool>* preprocessed;

protected:
/** subset used for index transformations */
CSubsetStack* m_subset_stack;
Expand Down
32 changes: 0 additions & 32 deletions src/shogun/features/SparseFeatures.cpp
Expand Up @@ -301,38 +301,6 @@ template<class ST> void CSparseFeatures<ST>::set_full_feature_matrix(SGMatrix<ST
sparse_feature_matrix.from_dense(full);
}

template<class ST> bool CSparseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
{
SG_INFO("force: %d\n", force_preprocessing)

if (sparse_feature_matrix.sparse_matrix && get_num_preprocessors())
{
for (int32_t i=0; i<get_num_preprocessors(); i++)
{
if (!is_preprocessed(i) || force_preprocessing)
{
set_preprocessed(i);
CSparsePreprocessor<ST>* p = (CSparsePreprocessor<ST>*) get_preprocessor(i);
SG_INFO("preprocessing using preproc %s\n", p->get_name())

if (p->apply_to_sparse_feature_matrix(this) == NULL)
{
SG_UNREF(p);
return false;
}

SG_UNREF(p);
}
}
return true;
}
else
{
SG_WARNING("no sparse feature matrix available or features already preprocessed - skipping.\n")
return false;
}
}

template<class ST> int32_t CSparseFeatures<ST>::get_num_vectors() const
{
return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : sparse_feature_matrix.num_vectors;
Expand Down
9 changes: 0 additions & 9 deletions src/shogun/features/SparseFeatures.h
Expand Up @@ -243,15 +243,6 @@ template <class ST> class CSparseFeatures : public CDotFeatures
*/
virtual void set_full_feature_matrix(SGMatrix<ST> full);

/** apply preprocessor
*
* possible with subset
*
* @param force_preprocessing if preprocssing shall be forced
* @return if applying was successful
*/
virtual bool apply_preprocessor(bool force_preprocessing=false);

/** get number of feature vectors, possibly of subset
*
* @return number of feature vectors
Expand Down
24 changes: 0 additions & 24 deletions src/shogun/features/StringFeatures.cpp
Expand Up @@ -1222,30 +1222,6 @@ template<class ST> bool CStringFeatures<ST>::save_compressed(char* dest, E_COMPR
return true;
}

template<class ST> bool CStringFeatures<ST>::apply_preprocessor(bool force_preprocessing)
{
SG_DEBUG("force: %d\n", force_preprocessing)

for (int32_t i=0; i<get_num_preprocessors(); i++)
{
if ( (!is_preprocessed(i) || force_preprocessing) )
{
set_preprocessed(i);
CStringPreprocessor<ST>* p=(CStringPreprocessor<ST>*) get_preprocessor(i);
SG_INFO("preprocessing using preproc %s\n", p->get_name())

if (!p->apply_to_string_features(this))
{
SG_UNREF(p);
return false;
}
else
SG_UNREF(p);
}
}
return true;
}

template<class ST> int32_t CStringFeatures<ST>::obtain_by_sliding_window(int32_t window_size, int32_t step_size, int32_t skip)
{
if (m_subset_stack->has_subsets())
Expand Down
7 changes: 0 additions & 7 deletions src/shogun/features/StringFeatures.h
Expand Up @@ -488,13 +488,6 @@ template <class ST> class CStringFeatures : public CFeatures
*/
virtual bool save_compressed(char* dest, E_COMPRESSION_TYPE compression, int level);

/** apply preprocessor
*
* @param force_preprocessing if preprocssing shall be forced
* @return if applying was successful
*/
virtual bool apply_preprocessor(bool force_preprocessing=false);

/** slides a window of size window_size over the current single string
* step_size is the amount by which the window is shifted.
* creates (string_len-window_size)/step_size many feature obj
Expand Down
10 changes: 0 additions & 10 deletions src/shogun/kernel/string/CommWordStringKernel.cpp
Expand Up @@ -156,16 +156,6 @@ float64_t CCommWordStringKernel::compute_helper(
else
bvec=NULL;
}
else
{
if ( (l->get_num_preprocessors() != l->get_num_preprocessed()) ||
(r->get_num_preprocessors() != r->get_num_preprocessed()))
{
SG_ERROR("not all preprocessors have been applied to training (%d/%d)"
" or test (%d/%d) data\n", l->get_num_preprocessed(), l->get_num_preprocessors(),
r->get_num_preprocessed(), r->get_num_preprocessors());
}
}

float64_t result=0;

Expand Down
10 changes: 0 additions & 10 deletions src/shogun/kernel/string/WeightedCommWordStringKernel.cpp
Expand Up @@ -124,16 +124,6 @@ float64_t CWeightedCommWordStringKernel::compute_helper(
else
bvec=NULL;
}
else
{
if ( (l->get_num_preprocessors() != l->get_num_preprocessed()) ||
(r->get_num_preprocessors() != r->get_num_preprocessed()))
{
SG_ERROR("not all preprocessors have been applied to training (%d/%d)"
" or test (%d/%d) data\n", l->get_num_preprocessed(), l->get_num_preprocessors(),
r->get_num_preprocessed(), r->get_num_preprocessors());
}
}

float64_t result=0;
uint8_t mask=0;
Expand Down
10 changes: 6 additions & 4 deletions tests/unit/classifier/GaussianProcessClassification_unittest.cc
Expand Up @@ -595,11 +595,13 @@ TEST_F(GaussianProcessClassification, apply_preprocessor_and_binary)
CRescaleFeatures* preproc=new CRescaleFeatures();
preproc->fit(features_train);

features_train->add_preprocessor(preproc);
features_train->apply_preprocessor();
features_train =
preproc->transform(features_train)->as<CDenseFeatures<float64_t>>();
SG_REF(features_train)

features_test->add_preprocessor(preproc);
features_test->apply_preprocessor();
features_test =
preproc->transform(features_test)->as<CDenseFeatures<float64_t>>();
SG_REF(features_test);

// logit likelihood
CLogitLikelihood* likelihood=new CLogitLikelihood();
Expand Down

0 comments on commit 74d8dba

Please sign in to comment.