Skip to content

Commit

Permalink
WIP add uint16 (word) string list to example/test
Browse files Browse the repository at this point in the history
Tests in/outgoing typemaps for non-char SGStringList

* fix outgoing typemap in octave, now cell array of row vectors, previously only contained first character of each string
* fix (and clean up) outgoing typemap in java, previously raised ArrayStoreException due to incorrect inner array JNI type
  • Loading branch information
karlnapf committed Feb 26, 2019
1 parent 07f55bb commit 8e1f41a
Show file tree
Hide file tree
Showing 14 changed files with 95 additions and 86 deletions.
2 changes: 1 addition & 1 deletion data
3 changes: 2 additions & 1 deletion examples/meta/generator/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ def parse(self, programString, filePath=None):
'RealMatrix': 'SHOGUNSGTYPE',
'LongRealMatrix': 'SHOGUNSGTYPE',
'ComplexMatrix': 'SHOGUNSGTYPE',
'StringCharList':'SHOGUNSGTYPE'
'StringCharList':'SHOGUNSGTYPE',
'StringWordList':'SHOGUNSGTYPE'
}

t_INTLITERAL = "-?[0-9]+"
Expand Down
4 changes: 2 additions & 2 deletions examples/meta/generator/targets/cpp.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"RealSubsetFeatures": "DenseSubsetFeatures<float64_t>",
"StringCharFeatures": "StringFeatures<char>",
"StringCharList": "SGStringList<char>",
"StringWordList": "SGStringList<uint16_t>",
"Default": "$typeName",
"bool": "bool",
"string": "char*",
Expand Down Expand Up @@ -99,9 +100,8 @@
"get_real_vector": "$object->get<SGVector<float64_t>>($arguments)",
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)",
"get_int_string_list": "$object->get<SGStringList<int32_t>>($arguments)",
"get_bool_string_list": "$object->get<SGStringList<bool>>($arguments)",
"get_char_string_list": "$object->get<SGStringList<char>>($arguments)",
"get_real_string_list": "$object->get<SGStringList<float64_t>>($arguments)"
"get_word_string_list": "$object->get<SGStringList<uint16_t>>($arguments)"
},
"StaticCall": "C$typeName::$method($arguments)",
"GlobalCall": "$method($arguments)",
Expand Down
5 changes: 3 additions & 2 deletions examples/meta/generator/targets/java.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"IncludeGlobalFunctions": false,
"DependencyListElement": "import org.shogun.$typeName;",
"DependencyListSeparator": "\n",
"ExcludeImport": ["StringCharList"]
"ExcludeImport": ["StringCharList", "StringWordList"]
},
"Statement": "$statement;\n",
"Comment": "//$comment\n",
Expand Down Expand Up @@ -70,7 +70,8 @@
"RealMatrix": "DoubleMatrix",
"LongRealMatrix": "DoubleMatrix",
"ComplexMatrix": "DoubleMatrix",
"StringCharList": "String[]"
"StringCharList": "String[]",
"StringWordList": "int[][]"
},
"Expr": {
"StringLiteral": "\"$literal\"",
Expand Down
4 changes: 1 addition & 3 deletions examples/meta/generator/targets/python.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,8 @@
"get_real": "$object.get($arguments)",
"get_real_vector": "$object.get($arguments)",
"get_real_matrix": "$object.get($arguments)",
"get_bool_string_list": "$object.get($arguments)",
"get_char_string_list": "$object.get($arguments)",
"get_real_string_list": "$object.get($arguments)",
"get_int_string_list": "$object.get($arguments)"
"get_word_string_list": "$object.get($arguments)"
},
"StaticCall": "$typeName.$method($arguments)",
"GlobalCall": "$method($arguments$kwargs)",
Expand Down
5 changes: 3 additions & 2 deletions examples/meta/generator/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def getBasicTypesToStore():
def getSGTypesToStore():
""" Returns all SG* types which will be serialized """
return ("RealVector","RealMatrix","FloatVector","FloatMatrix",
"StringCharList")
"StringCharList", "StringWordList")

def getSGTypeToStoreMethodName(sgType):
""" Translates given SG* type into meta language type """
Expand All @@ -93,7 +93,8 @@ def getSGTypeToStoreMethodName(sgType):
return "float_matrix"
elif sgType=="StringCharList":
return "string_char_list"

elif sgType=="StringWordList":
return "string_word_list"
else:
raise RuntimeError("Given Shogun type \"%s\" cannot be translated to meta type", sgType)

Expand Down
12 changes: 12 additions & 0 deletions examples/meta/src/features/string.sg
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
File words = csv_file("../../data/words.dat")
Features f_char = string_features(words, enum EAlphabet.RAWBYTE)
int max_string_length = f_char.get_int("max_string_length")
int number_of_strings = f_char.get_int("num_vectors")
StringCharList char_list = f_char.get_char_string_list("string_list")

int start = 0
int p_order = 2
int gap = 0
bool reverse = False
Features f_word = string_features(f_char, start, p_order, gap, reverse, enum EPrimitiveType.PT_UINT16)
StringWordList word_list = f_word.get_word_string_list("string_list")
11 changes: 0 additions & 11 deletions examples/meta/src/features/string_char.sg

This file was deleted.

71 changes: 38 additions & 33 deletions src/interfaces/java/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ TYPEMAP_SGMATRIX(float64_t, double, Double, jdouble, "toDoubleArray", "()[[D", "
#endif

/* input/output typemap for CStringFeatures */
%define TYPEMAP_STRINGFEATURES(SGTYPE, JTYPE, JAVATYPE, JNITYPE, JNIDESC, CLASSDESC)
%define TYPEMAP_STRINGFEATURES(SGTYPE, JTYPE, JAVATYPE, JNITYPE, CLASSDESC)

%typemap(jni) shogun::SGStringList<SGTYPE> %{jobjectArray%}
%typemap(jtype) shogun::SGStringList<SGTYPE> %{JTYPE[][]%}
Expand Down Expand Up @@ -672,29 +672,34 @@ TYPEMAP_SGMATRIX(float64_t, double, Double, jdouble, "toDoubleArray", "()[[D", "
}

%typemap(out) shogun::SGStringList<SGTYPE> {
shogun::SGString<SGTYPE>* str = $1.strings;
int32_t i, j, num = $1.num_strings;
jclass cls;
jobjectArray res;

cls = JCALL1(FindClass, jenv, CLASSDESC);
res = JCALL3(NewObjectArray, jenv, num, cls, NULL);

for (i = 0; i < num; i++) {
SGTYPE* data = SG_MALLOC(SGTYPE, str[i].slen);
sg_memcpy(data, str[i].string, str[i].slen * sizeof(SGTYPE));

##JNITYPE##Array jarr = (##JNITYPE##Array)JCALL1(New##JAVATYPE##Array, jenv, str[i].slen);

JNITYPE* arr = SG_MALLOC(JNITYPE, str[i].slen);
for (j = 0; j < str[i].slen; j++) {
arr[j] = (JNITYPE)data[j];
shogun::SGString<SGTYPE>* string_array = $1.strings;
int32_t num_strings = $1.num_strings;

// class for inner array (the invidivual strings)
jclass cls = JCALL1(FindClass, jenv, CLASSDESC);

// create outer array of inner array types
jobjectArray outer = JCALL3(NewObjectArray, jenv, num_strings, cls, NULL);

for (auto i : range(num_strings)) {
auto string = string_array[i];
auto slen = string.slen;

##JNITYPE##Array inner = JCALL1(New##JAVATYPE##Array, jenv, slen);

// convert to java type and pass to inner array
JNITYPE* inner_data = SG_MALLOC(JNITYPE, slen);
for (auto j : range(slen))
{
inner_data[j] = (JNITYPE)string.string[j];
}
JCALL4(Set##JAVATYPE##ArrayRegion, jenv, jarr, 0, str[i].slen, arr);
JCALL3(SetObjectArrayElement, jenv, res, i, jarr);
JCALL1(DeleteLocalRef, jenv, jarr);
JCALL4(Set##JAVATYPE##ArrayRegion, jenv, inner, 0, slen, inner_data);

// place inner into outer array
JCALL3(SetObjectArrayElement, jenv, outer, i, inner);
JCALL1(DeleteLocalRef, jenv, inner);
}
$result = res;
$result = outer;
}

%typemap(javain) shogun::SGStringList<SGTYPE> "$javainput"
Expand All @@ -704,17 +709,17 @@ TYPEMAP_SGMATRIX(float64_t, double, Double, jdouble, "toDoubleArray", "()[[D", "

%enddef

TYPEMAP_STRINGFEATURES(bool, boolean, Boolean, jboolean, "Boolen[][]", "[[Z")
TYPEMAP_STRINGFEATURES(uint8_t, byte, Byte, jbyte, "Byte[][]", "[[S")
TYPEMAP_STRINGFEATURES(int16_t, short, Short, jshort, "Short[][]", "[[S")
TYPEMAP_STRINGFEATURES(uint16_t, int, Int, jint, "Int[][]", "[[I")
TYPEMAP_STRINGFEATURES(int32_t, int, Int, jint, "Int[][]", "[[I")
TYPEMAP_STRINGFEATURES(uint32_t, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(int64_t, int, Int, jint, "Int[][]", "[[I")
TYPEMAP_STRINGFEATURES(uint64_t, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(long long, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(float32_t, float, Float, jfloat, "Float[][]", "[[F")
TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "Double[][]", "[[D")
TYPEMAP_STRINGFEATURES(bool, boolean, Boolean, jboolean, "[Z")
TYPEMAP_STRINGFEATURES(uint8_t, byte, Byte, jbyte, "[S")
TYPEMAP_STRINGFEATURES(int16_t, short, Short, jshort, "[S")
TYPEMAP_STRINGFEATURES(uint16_t, int, Int, jint, "[I")
TYPEMAP_STRINGFEATURES(int32_t, int, Int, jint, "[I")
TYPEMAP_STRINGFEATURES(uint32_t, long, Long, jlong, "[J")
TYPEMAP_STRINGFEATURES(int64_t, int, Int, jint, "[I")
TYPEMAP_STRINGFEATURES(uint64_t, long, Long, jlong, "[J")
TYPEMAP_STRINGFEATURES(long long, long, Long, jlong, "[J")
TYPEMAP_STRINGFEATURES(float32_t, float, Float, jfloat, "[F")
TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "[D")

#undef TYPEMAP_STRINGFEATURES

Expand Down
45 changes: 23 additions & 22 deletions src/interfaces/octave/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,6 @@ TYPEMAP_STRINGFEATURES_IN(is_matrix_type() && arg.is_uint16_type, uint16NDArray,
#undef TYPEMAP_STRINGFEATURES_IN

/* output typemap for CStringFeatures */
%define TYPEMAP_STRINGFEATURES_OUT(type,typecode)
%typemap(out) shogun::SGStringList<char>
{
shogun::SGString<char>* str = $1.strings;
Expand All @@ -381,32 +380,34 @@ TYPEMAP_STRINGFEATURES_IN(is_matrix_type() && arg.is_uint16_type, uint16NDArray,

$result = c;
}

%typemap(out) shogun::SGStringList<type>
%define TYPEMAP_STRINGFEATURES_OUT(oct_type, sg_type)
%typemap(out) shogun::SGStringList<sg_type>
{
shogun::SGString<type>* str = $1.strings;
int32_t i, num_strings = $1.num_strings;

ColumnVector c(dim_vector(num_strings, 1));

for (i = 0; i < num_strings; i++) {
c(i)=*str[i].string;
}
SGString<sg_type>* strings = $1.strings;
int32_t num_strings = $1.num_strings;
int32_t max_string_length = $1.max_string_length;

Cell c(num_strings, 1);

for (auto i : range(num_strings))
{
auto len = strings[i].slen;
dim_vector vdims = dim_vector::alloc(2);
vdims(0) = 1;
vdims(1) = len;
auto vec=oct_type(vdims);

for (auto j : range(len))
vec(j) = strings[i].string[j];

c(i) = vec;
}

$result = c;
$result = c;
}
%enddef

TYPEMAP_STRINGFEATURES_OUT(char, Cell)
TYPEMAP_STRINGFEATURES_OUT(uint8_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int16_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint16_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int32_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint32_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int64_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint64_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(float64_t, ColumnVector)

TYPEMAP_STRINGFEATURES_OUT(uint16NDArray, uint16_t)
#undef TYPEMAP_STRINGFEATURES_OUT

/* input typemap for Sparse Features */
Expand Down
3 changes: 1 addition & 2 deletions src/interfaces/python/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -1351,9 +1351,8 @@ _GETTERS = ["get",
"get_real_matrix",
"get_real_vector",
"get_int_vector",
"get_bool_string_list",
"get_char_string_list",
"get_int_string_list"
"get_word_string_list"
]

_FACTORIES = ["distance",
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/swig/Library.i
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ namespace shogun
%template(append_element_int) CDynamicObjectArray::append_element<int32_t, int32_t>;
#endif
%template(append_element_string_char_list) CDynamicObjectArray::append_element_string_list<SGStringList<char>, SGStringList<char>>;
%template(append_element_string_word_list) CDynamicObjectArray::append_element_string_list<SGStringList<uint16_t>, SGStringList<uint16_t>>;
}
%include <shogun/lib/IndexBlock.h>
%include <shogun/lib/IndexBlockRelation.h>
Expand Down
3 changes: 1 addition & 2 deletions src/interfaces/swig/shogun.i
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,8 @@ namespace shogun
%template(get_real) CSGObject::get<float64_t, void>;
%template(get_int) CSGObject::get<int32_t, void>;
%template(get_real_matrix) CSGObject::get<SGMatrix<float64_t>, void>;
%template(get_bool_string_list) CSGObject::get<SGStringList<bool>, void>;
%template(get_char_string_list) CSGObject::get<SGStringList<char>, void>;
%template(get_int_string_list) CSGObject::get<SGStringList<int32_t>, void>;
%template(get_word_string_list) CSGObject::get<SGStringList<uint16_t>, void>;

#ifndef SWIGJAVA
%template(get_real_vector) CSGObject::get<SGVector<float64_t>, void>;
Expand Down
12 changes: 7 additions & 5 deletions src/shogun/util/factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,19 @@ namespace shogun
*/
CFeatures* string_features(
CFeatures* features, int32_t start, int32_t p_order, int32_t gap,
bool rev, EPrimitiveType primitive_type)
bool rev, machine_int_t primitive_type)
{

REQUIRE_E(features, std::invalid_argument, "No features provided.\n");
REQUIRE_E(
features->get_feature_class() == C_STRING &&
features->get_feature_type() == F_CHAR,
std::invalid_argument, "Only StringCharFeatures are supported, "
"provided feature class (%d), feature type "
"(%d).\n",
features->get_feature_class(), features->get_feature_type());
std::invalid_argument, "Given features must be char-based StringFeatures, "
"provided (%s) have feature class (%d), feature type "
"(%d) and class name.\n",
features->get_name(),
features->get_feature_class(),
features->get_feature_type());

auto string_features = features->as<CStringFeatures<char>>();

Expand Down

0 comments on commit 8e1f41a

Please sign in to comment.