Skip to content

Commit

Permalink
Add meta example features-char-string
Browse files Browse the repository at this point in the history
  • Loading branch information
avramidis committed Jan 18, 2019
1 parent e3e8cd1 commit 71a7f3e
Show file tree
Hide file tree
Showing 15 changed files with 233 additions and 28 deletions.
5 changes: 3 additions & 2 deletions examples/meta/generator/targets/cpp.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"Type": {
"RealFeatures": "DenseFeatures<float64_t>",
"RealSubsetFeatures": "DenseSubsetFeatures<float64_t>",
"StringCharFeatures": "CStringFeatures<char>",
"StringCharFeatures": "StringFeatures<char>",
"Default": "$typeName",
"bool": "bool",
"string": "char*",
Expand Down Expand Up @@ -96,7 +96,8 @@
"get_int_vector": "$object->get<SGVector<int32_t>>($arguments)",
"get_real": "$object->get<float64_t>($arguments)",
"get_real_vector": "$object->get<SGVector<float64_t>>($arguments)",
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)"
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)",
"get_char_vector": "$object->get<SGStringList<char>>($arguments)"
},
"StaticCall": "C$typeName::$method($arguments)",
"GlobalCall": "$method($arguments)",
Expand Down
3 changes: 2 additions & 1 deletion examples/meta/generator/targets/python.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
"get_int_vector": "$object.get($arguments)",
"get_real": "$object.get($arguments)",
"get_real_vector": "$object.get($arguments)",
"get_real_matrix": "$object.get($arguments)"
"get_real_matrix": "$object.get($arguments)",
"get_char_vector": "$object.get($arguments)"
},
"StaticCall": "$typeName.$method($arguments)",
"GlobalCall": "$method($arguments)",
Expand Down
2 changes: 0 additions & 2 deletions examples/meta/generator/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,9 @@ def getSGTypeToStoreMethodName(sgType):
return "real_matrix"
elif sgType=="FloatMatrix":
return "float_matrix"

else:
raise RuntimeError("Given Shogun type \"%s\" cannot be translated to meta type", sgType)


def getVarsToStore(program):
""" Extracts all variables in program that should be stored """
varsToStore = []
Expand Down
11 changes: 11 additions & 0 deletions examples/meta/src/features/string_char.sg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
File words = csv_file("../../data/words.dat")

#![create_features]
Features f = string_features(words, enum EAlphabet.RAWBYTE)
#![create_features]

#![output stat]
int max_string_length = f.get_int("max_string_length")
int number_of_strings = f.get_int("get_num_vectors")
CharVector features = f.get_char_vector("get_features")
#![output stat]
53 changes: 42 additions & 11 deletions src/shogun/features/StringFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,22 @@ template<class ST> CStringFeatures<ST>::CStringFeatures(CAlphabet* alpha)
}

template<class ST> CStringFeatures<ST>::CStringFeatures(const CStringFeatures & orig)
: CFeatures(orig), num_vectors(orig.num_vectors),
single_string(orig.single_string),
length_of_single_string(orig.length_of_single_string),
max_string_length(orig.max_string_length),
num_symbols(orig.num_symbols),
original_num_symbols(orig.original_num_symbols),
order(orig.order), preprocess_on_get(false),
feature_cache(NULL)
: CFeatures(orig)
{
init();

ASSERT(orig.single_string == NULL) //not implemented

num_vectors = orig.num_vectors;
single_string = orig.single_string;
length_of_single_string = orig.length_of_single_string;
max_string_length = orig.max_string_length;
num_symbols = orig.num_symbols;
original_num_symbols = orig.original_num_symbols;
order = orig.order;
preprocess_on_get = false;
feature_cache = NULL;

alphabet=orig.alphabet;
SG_REF(alphabet);

Expand Down Expand Up @@ -216,7 +219,7 @@ template<class ST> EFeatureClass CStringFeatures<ST>::get_feature_class() const

template<class ST> EFeatureType CStringFeatures<ST>::get_feature_type() const { return F_UNKNOWN; }

template<class ST> CAlphabet* CStringFeatures<ST>::get_alphabet()
template<class ST> CAlphabet* CStringFeatures<ST>::get_alphabet() const
{
SG_REF(alphabet);
return alphabet;
Expand Down Expand Up @@ -1019,15 +1022,15 @@ template<class ST> bool CStringFeatures<ST>::append_features(SGString<ST>* p_fea
return false;
}

template<class ST> SGStringList<ST> CStringFeatures<ST>::get_features()
template<class ST> SGStringList<ST> CStringFeatures<ST>::get_features() const
{
SGStringList<ST> sl(NULL,0,0,false);

sl.strings=get_features(sl.num_strings, sl.max_string_length);
return sl;
}

template<class ST> SGString<ST>* CStringFeatures<ST>::get_features(int32_t& num_str, int32_t& max_str_len)
template<class ST> SGString<ST>* CStringFeatures<ST>::get_features(int32_t& num_str, int32_t& max_str_len) const
{
if (m_subset_stack->has_subsets())
SG_ERROR("get features() is not possible on subset")
Expand Down Expand Up @@ -1731,6 +1734,8 @@ template<class ST> void CStringFeatures<ST>::init()

m_parameters->add_vector(&symbol_mask_table, &symbol_mask_table_len, "mask_table", "Symbol mask table - using in higher order mapping");
watch_param("mask_table", &symbol_mask_table, &symbol_mask_table_len);
watch_method("get_num_vectors", &CStringFeatures::get_num_vectors);
watch_method("get_features", &CStringFeatures::get_features);
}

/** get feature type the char feature can deal with
Expand Down Expand Up @@ -2081,6 +2086,32 @@ bool CStringFeatures<ST>::obtain_from_char_features(CStringFeatures<CT>* sf, int
return true;
}

template<class ST>
CStringFeatures<ST>* CStringFeatures<ST>::clone() const
{
return new CStringFeatures<ST>(*this);
}

template <class ST>
bool CStringFeatures<ST>::equals(CStringFeatures<ST>& other)
{
if ((CAlphabet*)(this->get_alphabet())->get_alphabet()!=(CAlphabet*)(other.get_alphabet())->get_alphabet())
return false;

if (other.get_num_vectors()!=this->get_num_vectors())
return false;

for (int32_t line=0; line<num_vectors; line++)
{
SGVector<ST> fv=get_feature_vector(line);
SGVector<ST> fv_other=other.get_feature_vector(line);

return fv.equals(fv_other);
}

return true;
}

template class CStringFeatures<bool>;
template class CStringFeatures<char>;
template class CStringFeatures<int8_t>;
Expand Down
20 changes: 17 additions & 3 deletions src/shogun/features/StringFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <shogun/features/Features.h>
#include <shogun/features/Alphabet.h>
#include <shogun/lib/SGString.h>
#include <shogun/lib/SGStringList.h>

namespace shogun
{
Expand Down Expand Up @@ -157,7 +158,7 @@ template <class ST> class CStringFeatures : public CFeatures
*
* @return alphabet
*/
CAlphabet* get_alphabet();
CAlphabet* get_alphabet() const;

/** duplicate feature object
*
Expand Down Expand Up @@ -428,7 +429,7 @@ template <class ST> class CStringFeatures : public CFeatures
/** get_features
* @return features
*/
SGStringList<ST> get_features();
SGStringList<ST> get_features() const;

/** get_features
*
Expand All @@ -438,7 +439,7 @@ template <class ST> class CStringFeatures : public CFeatures
* @param max_str_len maximal string length (returned)
* @return string features
*/
virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len);
virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len) const;

/** copy_features
*
Expand Down Expand Up @@ -642,6 +643,19 @@ template <class ST> class CStringFeatures : public CFeatures
/** post method when subset is changed */
virtual void subset_changed_post();

#ifndef SWIG // SWIG should skip this part
// /** Clone for CStringFeatures */
CStringFeatures<ST>* clone() const;

/** Equals method to CStringFeatures
* @param other CStringFeatures to compare with
* @return false if alphabets and feature vectors are different,
* true otherwise
*/
bool equals(CStringFeatures<ST>& other);

#endif // SWIG // SWIG should skip this part

protected:
/** compute feature vector for sample num
* if target is set the vector is written to target
Expand Down
27 changes: 27 additions & 0 deletions src/shogun/features/StringFileFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ template <class ST> CStringFileFeatures<ST>::CStringFileFeatures() : CStringFeat
template <class ST> CStringFileFeatures<ST>::CStringFileFeatures(const char* fname, EAlphabet alpha)
: CStringFeatures<ST>(alpha)
{
file_name = fname;
file = new CMemoryMappedFile<ST>(fname);
fetch_meta_info_from_file();
}
Expand Down Expand Up @@ -120,6 +121,32 @@ template <class ST> void CStringFileFeatures<ST>::fetch_meta_info_from_file(int3
CStringFeatures<ST>::features=SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, CStringFeatures<ST>::num_vectors);
}

template<class ST>
CStringFileFeatures<ST>* CStringFileFeatures<ST>::clone()
{
CStringFileFeatures<ST>* result;

if (this->get_alphabet()->get_alphabet()==EAlphabet::NONE)
{
result = new CStringFileFeatures<ST>();
}
else
{
result = new CStringFileFeatures<ST>(result->file_name, this->get_alphabet()->get_alphabet());
}
SG_REF(result);
return result;
}

template <class ST>
bool CStringFileFeatures<ST>::equals(CStringFileFeatures<ST>& other)
{
if ((CAlphabet*)(this->get_alphabet())->get_alphabet()!=(CAlphabet*)(other.get_alphabet())->get_alphabet())
return false;

return true;
}

template class CStringFileFeatures<bool>;
template class CStringFileFeatures<char>;
template class CStringFileFeatures<int8_t>;
Expand Down
22 changes: 22 additions & 0 deletions src/shogun/features/StringFileFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,25 @@ template <class ST> class CStringFileFeatures : public CStringFeatures<ST>
*/
virtual const char* get_name() const { return "StringFileFeatures"; }

/** Returns the file name to be read.
*
* @return name of the file
*/
const char* get_file_name() const { return file_name; };

#ifndef SWIG // SWIG should skip this part
// /** Clone for CStringFileFeatures */
CStringFileFeatures<ST>* clone();

/** Equals method to CStringFileFeatures
* @param other CStringFileFeatures to compare with
* @return false if alphabets and file name are different,
* true otherwise
*/
bool equals(CStringFileFeatures<ST>& other);

#endif // SWIG // SWIG should skip this part

protected:
/** get next line from file
*
Expand Down Expand Up @@ -89,6 +108,9 @@ template <class ST> class CStringFileFeatures : public CStringFeatures<ST>
protected:
/** memory mapped file*/
CMemoryMappedFile<ST>* file;

/** file name*/
const char* file_name;
};
}
#endif // _CSTRINGFILEFEATURES__H__
21 changes: 18 additions & 3 deletions src/shogun/lib/SGStringList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,27 @@ SGStringList<T> SGStringList<T>::clone() const
{
SGStringList<T> result(*this);

auto strings = SG_MALLOC(SGString<T>, num_strings);
auto strs = SG_MALLOC(SGString<T>, num_strings);

for (auto i : range(num_strings))
strings[i] = this->strings[i].clone();
strs[i] = this->strings[i].clone();

return SGStringList<T>(strings, num_strings, max_string_length);
return SGStringList<T>(strs, num_strings, max_string_length);
}

template <class T>
bool SGStringList<T>::equals(const SGStringList<T>& other) const
{
if (this->num_strings!=other.num_strings)
return false;

if (this->max_string_length!=other.max_string_length)
return false;

for (auto i : range(num_strings))
return ((SGString<T>)(this->strings[i])).equals(other.strings[i]);

return true;
}

template class SGStringList<bool>;
Expand Down
8 changes: 8 additions & 0 deletions src/shogun/lib/SGStringList.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ template <class T> class SGStringList : public SGReferencedData
*/
SGStringList<T> clone() const;


/** Equals method to SGStringList
* @param other SGStringList to compare with
* @return false if alphabets and feature vectors are different,
* true otherwise
*/
bool equals(const SGStringList<T>& other) const;

protected:

/** copy data */
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/util/factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,16 @@ namespace shogun
}

CFeatures* string_features(
CFile* file, EAlphabet alpha = DNA,
EPrimitiveType primitive_type = PT_CHAR)
CFile* file, machine_int_t alphabet_type = DNA,
machine_int_t primitive_type = PT_CHAR)
{
REQUIRE(file, "No file provided.\n");
CFeatures* result = nullptr;

switch (primitive_type)
{
case PT_CHAR:
result = new CStringFeatures<char>(file, alpha);
result = new CStringFeatures<char>(file, static_cast<EAlphabet>(alphabet_type));
break;
default:
SG_SNOTIMPLEMENTED
Expand Down
1 change: 0 additions & 1 deletion tests/unit/base/SGObjectAll_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include <shogun/base/range.h>
#include <shogun/base/some.h>
#include <shogun/io/SerializableAsciiFile.h>

using namespace shogun;

// to have a type for non-template SGObject classes
Expand Down
18 changes: 16 additions & 2 deletions tests/unit/features/StringFeatures_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,11 @@ TEST(StringFeaturesTest,copy_subset)
TEST(StringFeaturesTest,clone)
{
SGStringList<char> strings = generateRandomStringData();

CStringFeatures<char>* f=new CStringFeatures<char>(strings, ALPHANUM);
CStringFeatures<char>* f_clone = (CStringFeatures<char> *) f->clone();
CStringFeatures<char>* f_clone = (CStringFeatures<char>*)f->clone();

EXPECT_EQ(f->get_num_vectors(), f_clone->get_num_vectors());
EXPECT_EQ(f->get_alphabet()->get_alphabet(), f_clone->get_alphabet()->get_alphabet());

for (index_t i=0; i<f->get_num_vectors(); ++i)
{
Expand All @@ -104,3 +106,15 @@ TEST(StringFeaturesTest,clone)
SG_UNREF(f);
SG_UNREF(f_clone);
}

TEST(StringFeaturesTest,equals)
{
SGStringList<char> strings = generateRandomStringData();

CStringFeatures<char>* f=new CStringFeatures<char>(strings, ALPHANUM);
CStringFeatures<char>* f_clone = (CStringFeatures<char>*)f->clone();
EXPECT_EQ(f->equals(*f_clone), true);

SG_UNREF(f);
SG_UNREF(f_clone);
}

0 comments on commit 71a7f3e

Please sign in to comment.