Skip to content

Commit

Permalink
Merge pull request #8 from jubatus/remove-regexp-dependency
Browse files Browse the repository at this point in the history
Users can remove regexp dependency at compile time
  • Loading branch information
gwtnb committed Jun 17, 2014
2 parents b2c8106 + 80b02a0 commit 4ec84ae
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 39 deletions.
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/converter_config_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ namespace jubatus {
namespace core {
namespace fv_converter {

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
TEST(converter_config, config) {
try {
std::ifstream ifs("./test_input/config.json");
Expand Down Expand Up @@ -77,6 +78,7 @@ TEST(converter_config, config) {
throw;
}
}
#endif

TEST(converter_config, hash) {
converter_config config;
Expand Down
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/datum_to_fv_converter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ TEST(datum_to_fv_converter, register_string_filter) {
EXPECT_EQ(1u, feature.size());
}

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
conv.register_string_filter(shared_ptr<key_matcher>(new match_all()),
shared_ptr<string_filter>(new regexp_filter("<[^>]*>", "")),
"_filtered");
Expand All @@ -320,6 +321,7 @@ TEST(datum_to_fv_converter, register_string_filter) {
EXPECT_EQ(2u, feature.size());
EXPECT_EQ("/text_filtered$aaa@str#bin/bin", feature[1].first);
}
#endif
}

TEST(datum_to_fv_converter, register_num_filter) {
Expand Down
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/key_matcher_factory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ TEST(fv_converter, key_matcher_factory) {
ASSERT_FALSE(m_t(f.create_matcher("hogehoge"))->match("hogefuga"));
ASSERT_TRUE(m_t(f.create_matcher("hogehoge"))->match("hogehoge"));

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
ASSERT_TRUE(m_t(f.create_matcher("/.*/hoge/"))->match("fuga/hoge"));
#endif
}

} // namespace fv_converter
Expand Down
41 changes: 41 additions & 0 deletions jubatus/core/fv_converter/null_regexp_filter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Jubatus: Online machine learning framework for distributed environment
// Copyright (C) 2014 Preferred Infrastructure and Nippon Telegraph and Telephone Corporation.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License version 2.1 as published by the Free Software Foundation.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#ifndef JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_FILTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_FILTER_HPP_

#include "string_filter.hpp"
#include "exception.hpp"

namespace jubatus {
namespace core {
namespace fv_converter {

class regexp_filter : public string_filter {
public:
regexp_filter(const std::string& regexp, const std::string& replace) {
throw JUBATUS_EXCEPTION(
converter_exception("regexp support is disabled"));
}

void filter(const std::string& input, std::string& output) const {}
};

} // namespace fv_converter
} // namespace core
} // namespace jubatus

#endif // JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_FILTER_HPP_
43 changes: 43 additions & 0 deletions jubatus/core/fv_converter/null_regexp_match.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Jubatus: Online machine learning framework for distributed environment
// Copyright (C) 2011 Preferred Infrastructure and Nippon Telegraph and Telephone Corporation.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License version 2.1 as published by the Free Software Foundation.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#ifndef JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_MATCH_HPP_
#define JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_MATCH_HPP_

#include "key_matcher.hpp"
#include "exception.hpp"

namespace jubatus {
namespace core {
namespace fv_converter {

class regexp_match : public key_matcher {
public:
explicit regexp_match(const std::string& regexp) {
throw JUBATUS_EXCEPTION(
converter_exception("regexp support is disabled"));
}

bool match(const std::string& key) {
return false;
}
};

} // namespace fv_converter
} // namespace core
} // namespace jubatus

#endif // JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_MATCH_HPP_
43 changes: 43 additions & 0 deletions jubatus/core/fv_converter/null_regexp_splitter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Jubatus: Online machine learning framework for distributed environment
// Copyright (C) 2012 Preferred Infrastructure and Nippon Telegraph and Telephone Corporation.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License version 2.1 as published by the Free Software Foundation.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#ifndef JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_SPLITTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_SPLITTER_HPP_

#include "word_splitter.hpp"
#include "exception.hpp"

namespace jubatus {
namespace core {
namespace fv_converter {

class regexp_splitter : public word_splitter {
public:
regexp_splitter(const std::string& regexp, int group) {
throw JUBATUS_EXCEPTION(
converter_exception("regexp support is disabled"));
}

void split(
const std::string& str,
std::vector<std::pair<size_t, size_t> >& bounds) const {};
};

} // namespace fv_converter
} // namespace core
} // namespace jubatus

#endif // JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_SPLITTER_HPP_
6 changes: 4 additions & 2 deletions jubatus/core/fv_converter/regexp_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
#ifndef JUBATUS_CORE_FV_CONVERTER_REGEXP_FILTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_REGEXP_FILTER_HPP_

#ifndef HAVE_RE2
#if defined HAVE_RE2
#include "re2_filter.hpp"
#elif defined HAVE_ONIGURUMA
#include "onig_filter.hpp"
#else
#include "re2_filter.hpp"
#include "null_regexp_filter.hpp"
#endif

#endif // JUBATUS_CORE_FV_CONVERTER_REGEXP_FILTER_HPP_
6 changes: 4 additions & 2 deletions jubatus/core/fv_converter/regexp_match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
#ifndef JUBATUS_CORE_FV_CONVERTER_REGEXP_MATCH_HPP_
#define JUBATUS_CORE_FV_CONVERTER_REGEXP_MATCH_HPP_

#ifndef HAVE_RE2
#if defined HAVE_RE2
#include "re2_match.hpp"
#elif defined HAVE_ONIGURUMA
#include "onig_match.hpp"
#else
#include "re2_match.hpp"
#include "null_regexp_match.hpp"
#endif

#endif // JUBATUS_CORE_FV_CONVERTER_REGEXP_MATCH_HPP_
6 changes: 4 additions & 2 deletions jubatus/core/fv_converter/regexp_splitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
#ifndef JUBATUS_CORE_FV_CONVERTER_REGEXP_SPLITTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_REGEXP_SPLITTER_HPP_

#ifndef HAVE_RE2
#if defined HAVE_RE2
#include "re2_splitter.hpp"
#elif defined HAVE_ONIGURUMA
#include "onig_splitter.hpp"
#else
#include "re2_splitter.hpp"
#include "null_regexp_splitter.hpp"
#endif

#endif // JUBATUS_CORE_FV_CONVERTER_REGEXP_SPLITTER_HPP_
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/string_feature_factory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ TEST(string_feature_factory, ngram) {
jubatus::util::lang::shared_ptr<string_feature> s(f.create("ngram", param));
}

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
TEST(string_feature_factory, regexp) {
string_feature_factory f;
std::map<std::string, std::string> param;
Expand All @@ -69,6 +70,7 @@ TEST(string_feature_factory, regexp) {
param.erase("group");
jubatus::util::lang::shared_ptr<string_feature>(f.create("regexp", param));
}
#endif

} // namespace fv_converter
} // namespace core
Expand Down
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/string_filter_factory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ namespace jubatus {
namespace core {
namespace fv_converter {

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
TEST(string_filter_factory, trivial) {
string_filter_factory f;
std::map<std::string, std::string> p;
Expand All @@ -34,6 +35,7 @@ TEST(string_filter_factory, trivial) {
jubatus::util::lang::shared_ptr<string_filter> filter(f.create("regexp", p));
EXPECT_TRUE(typeid(*filter) == typeid(regexp_filter));
}
#endif

TEST(string_filter_factory, unknown) {
string_filter_factory f;
Expand Down
71 changes: 38 additions & 33 deletions jubatus/core/fv_converter/wscript
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import Options

def options(opt):
opt.add_option('--enable-re2',
action='store_true',
default=False,
help='do use re2 instead of oniguruma')
opt.add_option('--regexp-library',
default='oniguruma',
help='use regexp library: oniguruma (default), re2 or none')

def configure(conf):
if Options.options.enable_re2:
regexp_lib = Options.options.regexp_library
if regexp_lib == 're2':
conf.check_cxx(lib = 're2', define_name = 'HAVE_RE2',
errmsg = 'not found')
elif regexp_lib == 'oniguruma':
conf.check_cxx(lib = 'onig', define_name = 'HAVE_ONIGURUMA',
errmsg = 'not found')
elif regexp_lib == 'none':
pass
else:
conf.check_cxx(lib = 'onig', define_name = 'HAVE_ONIGURUMA')
conf.fatal('unsupported regexp library specified: ' + regexp_lib)

def make_test(bld, use, src):
bld.program(
Expand Down Expand Up @@ -74,30 +79,7 @@ def build(bld):
'type.hpp',
'factory.hpp',
'util.hpp',
]
use = ['jubatus_util', 'MSGPACK']

if bld.env.HAVE_RE2:
source.append('re2_match.cpp')
source.append('re2_filter.cpp')
source.append('re2_splitter.cpp')
use.append('RE2')
else:
source.append('onig_match.cpp')
source.append('onig_filter.cpp')
source.append('onig_splitter.cpp')
use.append('ONIG')

bld.core_sources.extend(bld.add_prefix(source))
bld.core_headers.extend(bld.add_prefix(headers))
bld.core_use.extend(use)

n = bld.path.get_bld().make_node('test_input')
n.mkdir()
bld(rule = 'cp ${SRC} ${TGT}',
source = bld.path.ant_glob('test_input/*'),
target = n)

]
test_source = [
'json_converter_test.cpp',
'msgpack_converter_test.cpp',
Expand All @@ -119,12 +101,35 @@ def build(bld):
'keyword_weights_test.cpp',
'feature_hasher_test.cpp',
'except_match_test.cpp',
'mixable_weight_manager_test.cpp',
]
regex_test_source = [
'regexp_match_test.cpp',
'regexp_filter_test.cpp',
'regexp_splitter_test.cpp',
'mixable_weight_manager_test.cpp',
]
]

use = ['jubatus_util', 'MSGPACK']

if bld.env.HAVE_RE2:
source += ['re2_match.cpp', 're2_filter.cpp', 're2_splitter.cpp' ]
test_source += regex_test_source
use += ['RE2']
elif bld.env.HAVE_ONIGURUMA:
source += ['onig_match.cpp', 'onig_filter.cpp', 'onig_splitter.cpp']
test_source += regex_test_source
use += ['ONIG']

bld.core_sources.extend(bld.add_prefix(source))
bld.core_headers.extend(bld.add_prefix(headers))
bld.core_use.extend(use)

n = bld.path.get_bld().make_node('test_input')
n.mkdir()
bld(rule = 'cp ${SRC} ${TGT}',
source = bld.path.ant_glob('test_input/*'),
target = n)

test_use = ['jubatus_util', 'jubatus_core']

make_tests(bld, test_use, test_source)

0 comments on commit 4ec84ae

Please sign in to comment.