Skip to content

Commit

Permalink
Added meta example features-char-string
Browse files Browse the repository at this point in the history
  • Loading branch information
avramidis authored and vigsterkr committed Feb 19, 2019
1 parent e570460 commit dad4536
Show file tree
Hide file tree
Showing 26 changed files with 127 additions and 65 deletions.
2 changes: 1 addition & 1 deletion doc/readme/INTERFACES.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ This needs `shogun.oct` to be visible, which is either in `path/to/build/src/int

Running an example:

python path/to/octave_example.py
octave path/to/octave_example.m

### Ruby
This needs `shogun.rb` to be visible, which is either in `path/to/build/src/interfaces/ruby_modular/` or in something similar to `path/to/shogun-install/lib/x86_64-linux-gnu/site_ruby`
Expand Down
3 changes: 2 additions & 1 deletion examples/meta/generator/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def parse(self, programString, filePath=None):
'ShortRealMatrix': 'SHOGUNSGTYPE',
'RealMatrix': 'SHOGUNSGTYPE',
'LongRealMatrix': 'SHOGUNSGTYPE',
'ComplexMatrix': 'SHOGUNSGTYPE'
'ComplexMatrix': 'SHOGUNSGTYPE',
'StringCharList':'SHOGUNSGTYPE'
}

t_INTLITERAL = "-?[0-9]+"
Expand Down
9 changes: 7 additions & 2 deletions examples/meta/generator/targets/cpp.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
"Type": {
"RealFeatures": "DenseFeatures<float64_t>",
"RealSubsetFeatures": "DenseSubsetFeatures<float64_t>",
"StringCharFeatures": "CStringFeatures<char>",
"StringCharFeatures": "StringFeatures<char>",
"StringCharList": "SGStringList<char>",
"Default": "$typeName",
"bool": "bool",
"string": "char*",
Expand Down Expand Up @@ -96,7 +97,11 @@
"get_int_vector": "$object->get<SGVector<int32_t>>($arguments)",
"get_real": "$object->get<float64_t>($arguments)",
"get_real_vector": "$object->get<SGVector<float64_t>>($arguments)",
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)"
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)",
"get_int_string_list": "$object->get<SGStringList<int32_t>>($arguments)",
"get_bool_string_list": "$object->get<SGStringList<bool>>($arguments)",
"get_char_string_list": "$object->get<SGStringList<char>>($arguments)",
"get_real_string_list": "$object->get<SGStringList<float64_t>>($arguments)"
},
"StaticCall": "C$typeName::$method($arguments)",
"GlobalCall": "$method($arguments)",
Expand Down
3 changes: 2 additions & 1 deletion examples/meta/generator/targets/csharp.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
"ShortRealMatrix": "float[,]",
"RealMatrix": "double[,]",
"LongRealMatrix": "double[,]",
"ComplexMatrix": "Complex[,]"
"ComplexMatrix": "Complex[,]",
"StringCharList": "String[]"
},
"Expr": {
"StringLiteral": "\"$literal\"",
Expand Down
6 changes: 4 additions & 2 deletions examples/meta/generator/targets/java.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IncludeEnums": true,
"IncludeGlobalFunctions": false,
"DependencyListElement": "import org.shogun.$typeName;",
"DependencyListSeparator": "\n"
"DependencyListSeparator": "\n",
"ExcludeImport": ["StringCharList"]
},
"Statement": "$statement;\n",
"Comment": "//$comment\n",
Expand Down Expand Up @@ -68,7 +69,8 @@
"ShortRealMatrix": "FloatMatrix",
"RealMatrix": "DoubleMatrix",
"LongRealMatrix": "DoubleMatrix",
"ComplexMatrix": "DoubleMatrix"
"ComplexMatrix": "DoubleMatrix",
"StringCharList": "String[]"
},
"Expr": {
"StringLiteral": "\"$literal\"",
Expand Down
2 changes: 1 addition & 1 deletion examples/meta/generator/targets/octave.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"Program": "shogun\n\n$program",
"Program": "shogun;\n\n$program",
"Statement": "$statement;\n",
"Comment": "%$comment\n",
"Init": {
Expand Down
6 changes: 5 additions & 1 deletion examples/meta/generator/targets/python.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@
"get_int_vector": "$object.get($arguments)",
"get_real": "$object.get($arguments)",
"get_real_vector": "$object.get($arguments)",
"get_real_matrix": "$object.get($arguments)"
"get_real_matrix": "$object.get($arguments)",
"get_bool_string_list": "$object.get($arguments)",
"get_char_string_list": "$object.get($arguments)",
"get_real_string_list": "$object.get($arguments)",
"get_int_string_list": "$object.get($arguments)"
},
"StaticCall": "$typeName.$method($arguments)",
"GlobalCall": "$method($arguments$kwargs)",
Expand Down
4 changes: 4 additions & 0 deletions examples/meta/generator/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,10 @@ def dependenciesString(self, allClasses, interfacedClasses, enums,
if self.targetDict["Dependencies"].get("IncludeGlobalFunctions"):
dependencies = dependencies.union(globalFunctions)

if "ExcludeImport" in self.targetDict["Dependencies"]:
for item in self.targetDict["Dependencies"].get("ExcludeImport"):
dependencies.discard(item)

dependencies = list(dependencies)

translations = list(set(map(self.translateDependencyElement, dependencies)))
Expand Down
11 changes: 11 additions & 0 deletions examples/meta/src/features/string_char.sg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
File words = csv_file("../../data/words.dat")

#![create_features]
Features f = string_features(words, enum EAlphabet.RAWBYTE)
#![create_features]

#![output stat]
int max_string_length = f.get_int("max_string_length")
int number_of_strings = f.get_int("num_vectors")
StringCharList string_list = f.get_char_string_list("string_list")
#![output stat]
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_char.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def features_string_char (strings):
f.set_feature_vector(array(['t','e','s','t']), 0)

#print("strings", f.get_features())
return f.get_features(), f
return f.get_string_list(), f

if __name__=='__main__':
print('StringCharFeatures')
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def features_string_file (directory, fname):
#or load fasta file
#f.load_fasta('fasta.fa')
#print(f.get_features())
return f.get_features(), f
return f.get_string_list(), f

if __name__=='__main__':
print('StringWordFeatures')
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_ulong.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def features_string_ulong (start=0,order=2,gap=0,rev=False):
uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0)


return uf.get_features(),uf.get_feature_vector(2), uf.get_num_vectors()
return uf.get_string_list(),uf.get_feature_vector(2), uf.get_num_vectors()

if __name__=='__main__':
print('simple_longint')
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def features_string_word (strings, start, order, gap, rev):
wf.set_feature_vector(array([1,2,3,4,5], dtype=uint16), 0)

#print("strings", wf.get_features())
return wf.get_features(), wf
return wf.get_string_list(), wf

if __name__=='__main__':
print('StringWordFeatures')
Expand Down
4 changes: 1 addition & 3 deletions src/interfaces/csharp/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,7 @@ TYPEMAP_SGMATRIX(float64_t, double, double)
for (i = 0; i < rows; i++) {
sg_memcpy(res, str[i].string, str[i].slen * sizeof(SGTYPE));
res = res + cols;
SG_FREE(str[i].string);
}
SG_FREE(str);
$result = res;
}

Expand Down Expand Up @@ -341,7 +339,7 @@ TYPEMAP_STRINGFEATURES(float64_t, double, double)

string[] result = new string[size];
for (int i = 0; i < size; i++) {
result[i] = Marshal.PtrToStringAnsi(ptrarray[i + 1]);
result[i] = Marshal.PtrToStringAnsi(ptrarray[i + 1]);
}

Marshal.FreeCoTaskMem(ranks[0]);
Expand Down
8 changes: 1 addition & 7 deletions src/interfaces/java/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -693,11 +693,7 @@ TYPEMAP_SGMATRIX(float64_t, double, Double, jdouble, "toDoubleArray", "()[[D", "
JCALL4(Set##JAVATYPE##ArrayRegion, jenv, jarr, 0, str[i].slen, arr);
JCALL3(SetObjectArrayElement, jenv, res, i, jarr);
JCALL1(DeleteLocalRef, jenv, jarr);

This comment has been minimized.

Copy link
@karlnapf

karlnapf Feb 26, 2019

Member

Just seeing this. This second line introduces a memory leak. Same goes here: if modifying/adding low level code, it needs to be tested at least locally, in this case with a memory leak checker.
I’ll fix it in my string PR

SG_FREE(str[i].string);
SG_FREE(arr);
}
SG_FREE(str);
$result = res;
}

Expand All @@ -718,7 +714,7 @@ TYPEMAP_STRINGFEATURES(int64_t, int, Int, jint, "Int[][]", "[[I")
TYPEMAP_STRINGFEATURES(uint64_t, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(long long, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(float32_t, float, Float, jfloat, "Float[][]", "[[F")
TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "Doulbe[][]", "[[D")
TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "Double[][]", "[[D")

#undef TYPEMAP_STRINGFEATURES

Expand Down Expand Up @@ -777,9 +773,7 @@ TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "Doulbe[][]", "[[D")
jstring jstr = (jstring)JCALL1(NewStringUTF, jenv, (char *)str[i].string);
JCALL3(SetObjectArrayElement, jenv, res, i, jstr);
JCALL1(DeleteLocalRef, jenv, jstr);
SG_FREE(str[i].string);
}
SG_FREE(str);
$result = res;
}

Expand Down
43 changes: 33 additions & 10 deletions src/interfaces/octave/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -368,21 +368,44 @@ TYPEMAP_STRINGFEATURES_IN(is_matrix_type() && arg.is_uint16_type, uint16NDArray,

/* output typemap for CStringFeatures */
%define TYPEMAP_STRINGFEATURES_OUT(type,typecode)
%typemap(out) shogun::SGStringList<char>
{
shogun::SGString<char>* str = $1.strings;
int32_t i, num_strings = $1.num_strings;

Cell c(num_strings, 1);

for (i = 0; i < num_strings; i++) {
c(i)=std::string(str[i].string);
}

$result = c;
}

%typemap(out) shogun::SGStringList<type>
{
/* TODO STRING OUT TYPEMAPS */
shogun::SGString<type>* str = $1.strings;
int32_t i, num_strings = $1.num_strings;

ColumnVector c(dim_vector(num_strings, 1));

for (i = 0; i < num_strings; i++) {
c(i)=*str[i].string;

This comment has been minimized.

Copy link
@karlnapf

karlnapf Feb 25, 2019

Member

This code never has worked for non-char strings (the result only contained the first element of the string). I just put a fix in my last PR (not yet merged). Generally, such codes deep down the lib need to be explicitly tested before they get merged. At least run them locally to see the output if there is no test.

This comment has been minimized.

Copy link
@avramidis

avramidis Feb 26, 2019

Author Contributor

I did test it for chars only. I thought that we will scrap the non char ones because #4423 (comment)
I should have addressed this comment before thought.

This comment has been minimized.

Copy link
@karlnapf

karlnapf Feb 26, 2019

Member

Sure we don’t need to add them all it they are not used in examples for now. But below you explicitly instantiate the non-working/tested typemap for all those non char types. That’s quite different to not adding them in the first place.
Well nevermind, next time we can be a bit more careful. It is a good opportunity to generally clean this area up a bit.

This comment has been minimized.

Copy link
@avramidis

avramidis Feb 26, 2019

Author Contributor

Yes, I agree. I am not saying it was your fault. I am saying it was mine that I didn't dig more.

}

$result = c;
}
%enddef

TYPEMAP_STRINGFEATURES_OUT(char, charMatrix)
TYPEMAP_STRINGFEATURES_OUT(uint8_t, uint8NDArray)
TYPEMAP_STRINGFEATURES_OUT(int16_t, int16NDArray)
TYPEMAP_STRINGFEATURES_OUT(uint16_t, uint16NDArray)
TYPEMAP_STRINGFEATURES_OUT(int32_t, int32NDArray)
TYPEMAP_STRINGFEATURES_OUT(uint32_t, uint32NDArray)
TYPEMAP_STRINGFEATURES_OUT(int64_t, int64NDArray)
TYPEMAP_STRINGFEATURES_OUT(uint64_t, uint64NDArray)
TYPEMAP_STRINGFEATURES_OUT(float64_t, Matrix)
TYPEMAP_STRINGFEATURES_OUT(char, Cell)
TYPEMAP_STRINGFEATURES_OUT(uint8_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int16_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint16_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int32_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint32_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int64_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint64_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(float64_t, ColumnVector)

#undef TYPEMAP_STRINGFEATURES_OUT

Expand Down
5 changes: 4 additions & 1 deletion src/interfaces/python/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,10 @@ _GETTERS = ["get",
"get_int",
"get_real_matrix",
"get_real_vector",
"get_int_vector"
"get_int_vector",
"get_bool_string_list",
"get_char_string_list",
"get_int_string_list"
]

_FACTORIES = ["distance",
Expand Down
3 changes: 3 additions & 0 deletions src/interfaces/swig/shogun.i
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ namespace shogun
%template(get_real) CSGObject::get<float64_t, void>;
%template(get_int) CSGObject::get<int32_t, void>;
%template(get_real_matrix) CSGObject::get<SGMatrix<float64_t>, void>;
%template(get_bool_string_list) CSGObject::get<SGStringList<bool>, void>;
%template(get_char_string_list) CSGObject::get<SGStringList<char>, void>;
%template(get_int_string_list) CSGObject::get<SGStringList<int32_t>, void>;

#ifndef SWIGJAVA
%template(get_real_vector) CSGObject::get<SGVector<float64_t>, void>;
Expand Down
8 changes: 5 additions & 3 deletions src/shogun/features/StringFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ template<class ST> EFeatureClass CStringFeatures<ST>::get_feature_class() const

template<class ST> EFeatureType CStringFeatures<ST>::get_feature_type() const { return F_UNKNOWN; }

template<class ST> CAlphabet* CStringFeatures<ST>::get_alphabet()
template<class ST> CAlphabet* CStringFeatures<ST>::get_alphabet() const
{
SG_REF(alphabet);
return alphabet;
Expand Down Expand Up @@ -983,15 +983,15 @@ template<class ST> bool CStringFeatures<ST>::append_features(SGString<ST>* p_fea
return false;
}

template<class ST> SGStringList<ST> CStringFeatures<ST>::get_features()
template<class ST> SGStringList<ST> CStringFeatures<ST>::get_string_list() const
{
SGStringList<ST> sl(NULL,0,0,false);

sl.strings=get_features(sl.num_strings, sl.max_string_length);
return sl;
}

template<class ST> SGString<ST>* CStringFeatures<ST>::get_features(int32_t& num_str, int32_t& max_str_len)
template<class ST> SGString<ST>* CStringFeatures<ST>::get_features(int32_t& num_str, int32_t& max_str_len) const
{
if (m_subset_stack->has_subsets())
SG_ERROR("get features() is not possible on subset")
Expand Down Expand Up @@ -1695,6 +1695,8 @@ template<class ST> void CStringFeatures<ST>::init()

m_parameters->add_vector(&symbol_mask_table, &symbol_mask_table_len, "mask_table", "Symbol mask table - using in higher order mapping");
watch_param("mask_table", &symbol_mask_table, &symbol_mask_table_len);
watch_method("num_vectors", &CStringFeatures::get_num_vectors);
watch_method("string_list", &CStringFeatures::get_string_list);
}

/** get feature type the char feature can deal with
Expand Down
10 changes: 5 additions & 5 deletions src/shogun/features/StringFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ template <class ST> class CStringFeatures : public CFeatures
*
* @return alphabet
*/
CAlphabet* get_alphabet();
CAlphabet* get_alphabet() const;

/** duplicate feature object
*
Expand Down Expand Up @@ -419,10 +419,10 @@ template <class ST> class CStringFeatures : public CFeatures
bool append_features(SGString<ST>* p_features, int32_t p_num_vectors,
int32_t p_max_string_length);

/** get_features
* @return features
/** get_string_list
* @return string_list
*/
SGStringList<ST> get_features();
SGStringList<ST> get_string_list() const;

/** get_features
*
Expand All @@ -432,7 +432,7 @@ template <class ST> class CStringFeatures : public CFeatures
* @param max_str_len maximal string length (returned)
* @return string features
*/
virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len);
virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len) const;

/** copy_features
*
Expand Down
18 changes: 18 additions & 0 deletions src/shogun/lib/SGStringList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,24 @@ SGStringList<T> SGStringList<T>::clone() const
return SGStringList<T>(strings, num_strings, max_string_length);
}

template <class T>
bool SGStringList<T>::equals(const SGStringList<T>& other) const
{
if (this->num_strings!=other.num_strings)
return false;

if (this->max_string_length!=other.max_string_length)
return false;

for (auto i : range(num_strings))
{
if (!this->strings[i].equals(other.strings[i]))
return false;
}

return true;
}

template class SGStringList<bool>;
template class SGStringList<char>;
template class SGStringList<int8_t>;
Expand Down
8 changes: 8 additions & 0 deletions src/shogun/lib/SGStringList.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ template <class T> class SGStringList : public SGReferencedData
*/
SGStringList<T> clone() const;


/** Equals method
* @param other SGStringList to compare with
* @return false iff the number of strings, the maximum string length or
* any of the string items are different, true otherwise
*/
bool equals(const SGStringList<T>& other) const;

protected:

/** copy data */
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/preprocessor/StringPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ namespace shogun
string_features = new CStringFeatures<ST>(*string_features);
}

auto string_list = string_features->get_features();
auto string_list = string_features->get_string_list();

apply_to_string_list(string_list);

Expand Down

0 comments on commit dad4536

Please sign in to comment.