Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Users can remove regexp dependency at compile time #8

Merged
merged 2 commits into from
Jun 17, 2014
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/converter_config_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ namespace jubatus {
namespace core {
namespace fv_converter {

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
TEST(converter_config, config) {
try {
std::ifstream ifs("./test_input/config.json");
Expand Down Expand Up @@ -77,6 +78,7 @@ TEST(converter_config, config) {
throw;
}
}
#endif

TEST(converter_config, hash) {
converter_config config;
Expand Down
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/datum_to_fv_converter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ TEST(datum_to_fv_converter, register_string_filter) {
EXPECT_EQ(1u, feature.size());
}

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
conv.register_string_filter(shared_ptr<key_matcher>(new match_all()),
shared_ptr<string_filter>(new regexp_filter("<[^>]*>", "")),
"_filtered");
Expand All @@ -320,6 +321,7 @@ TEST(datum_to_fv_converter, register_string_filter) {
EXPECT_EQ(2u, feature.size());
EXPECT_EQ("/text_filtered$aaa@str#bin/bin", feature[1].first);
}
#endif
}

TEST(datum_to_fv_converter, register_num_filter) {
Expand Down
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/key_matcher_factory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ TEST(fv_converter, key_matcher_factory) {
ASSERT_FALSE(m_t(f.create_matcher("hogehoge"))->match("hogefuga"));
ASSERT_TRUE(m_t(f.create_matcher("hogehoge"))->match("hogehoge"));

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
ASSERT_TRUE(m_t(f.create_matcher("/.*/hoge/"))->match("fuga/hoge"));
#endif
}

} // namespace fv_converter
Expand Down
41 changes: 41 additions & 0 deletions jubatus/core/fv_converter/null_regexp_filter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Jubatus: Online machine learning framework for distributed environment
// Copyright (C) 2014 Preferred Infrastructure and Nippon Telegraph and Telephone Corporation.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License version 2.1 as published by the Free Software Foundation.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#ifndef JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_FILTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_FILTER_HPP_

#include "string_filter.hpp"
#include "exception.hpp"

namespace jubatus {
namespace core {
namespace fv_converter {

class regexp_filter : public string_filter {
public:
regexp_filter(const std::string& regexp, const std::string& replace) {
throw JUBATUS_EXCEPTION(
converter_exception("regexp support is disabled"));
}

void filter(const std::string& input, std::string& output) const {}
};

} // namespace fv_converter
} // namespace core
} // namespace jubatus

#endif // JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_FILTER_HPP_
43 changes: 43 additions & 0 deletions jubatus/core/fv_converter/null_regexp_match.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Jubatus: Online machine learning framework for distributed environment
// Copyright (C) 2011 Preferred Infrastructure and Nippon Telegraph and Telephone Corporation.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License version 2.1 as published by the Free Software Foundation.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#ifndef JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_MATCH_HPP_
#define JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_MATCH_HPP_

#include "key_matcher.hpp"
#include "exception.hpp"

namespace jubatus {
namespace core {
namespace fv_converter {

class regexp_match : public key_matcher {
public:
explicit regexp_match(const std::string& regexp) {
throw JUBATUS_EXCEPTION(
converter_exception("regexp support is disabled"));
}

bool match(const std::string& key) {
return false;
}
};

} // namespace fv_converter
} // namespace core
} // namespace jubatus

#endif // JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_MATCH_HPP_
43 changes: 43 additions & 0 deletions jubatus/core/fv_converter/null_regexp_splitter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Jubatus: Online machine learning framework for distributed environment
// Copyright (C) 2012 Preferred Infrastructure and Nippon Telegraph and Telephone Corporation.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License version 2.1 as published by the Free Software Foundation.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#ifndef JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_SPLITTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_SPLITTER_HPP_

#include "word_splitter.hpp"
#include "exception.hpp"

namespace jubatus {
namespace core {
namespace fv_converter {

class regexp_splitter : public word_splitter {
public:
regexp_splitter(const std::string& regexp, int group) {
throw JUBATUS_EXCEPTION(
converter_exception("regexp support is disabled"));
}

void split(
const std::string& str,
std::vector<std::pair<size_t, size_t> >& bounds) const {};
};

} // namespace fv_converter
} // namespace core
} // namespace jubatus

#endif // JUBATUS_CORE_FV_CONVERTER_NULL_REGEXP_SPLITTER_HPP_
6 changes: 4 additions & 2 deletions jubatus/core/fv_converter/regexp_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
#ifndef JUBATUS_CORE_FV_CONVERTER_REGEXP_FILTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_REGEXP_FILTER_HPP_

#ifndef HAVE_RE2
#if defined HAVE_RE2
#include "re2_filter.hpp"
#elif defined HAVE_ONIGURUMA
#include "onig_filter.hpp"
#else
#include "re2_filter.hpp"
#include "null_regexp_filter.hpp"
#endif

#endif // JUBATUS_CORE_FV_CONVERTER_REGEXP_FILTER_HPP_
6 changes: 4 additions & 2 deletions jubatus/core/fv_converter/regexp_match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
#ifndef JUBATUS_CORE_FV_CONVERTER_REGEXP_MATCH_HPP_
#define JUBATUS_CORE_FV_CONVERTER_REGEXP_MATCH_HPP_

#ifndef HAVE_RE2
#if defined HAVE_RE2
#include "re2_match.hpp"
#elif defined HAVE_ONIGURUMA
#include "onig_match.hpp"
#else
#include "re2_match.hpp"
#include "null_regexp_match.hpp"
#endif

#endif // JUBATUS_CORE_FV_CONVERTER_REGEXP_MATCH_HPP_
6 changes: 4 additions & 2 deletions jubatus/core/fv_converter/regexp_splitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
#ifndef JUBATUS_CORE_FV_CONVERTER_REGEXP_SPLITTER_HPP_
#define JUBATUS_CORE_FV_CONVERTER_REGEXP_SPLITTER_HPP_

#ifndef HAVE_RE2
#if defined HAVE_RE2
#include "re2_splitter.hpp"
#elif defined HAVE_ONIGURUMA
#include "onig_splitter.hpp"
#else
#include "re2_splitter.hpp"
#include "null_regexp_splitter.hpp"
#endif

#endif // JUBATUS_CORE_FV_CONVERTER_REGEXP_SPLITTER_HPP_
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/string_feature_factory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ TEST(string_feature_factory, ngram) {
jubatus::util::lang::shared_ptr<string_feature> s(f.create("ngram", param));
}

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
TEST(string_feature_factory, regexp) {
string_feature_factory f;
std::map<std::string, std::string> param;
Expand All @@ -69,6 +70,7 @@ TEST(string_feature_factory, regexp) {
param.erase("group");
jubatus::util::lang::shared_ptr<string_feature>(f.create("regexp", param));
}
#endif

} // namespace fv_converter
} // namespace core
Expand Down
2 changes: 2 additions & 0 deletions jubatus/core/fv_converter/string_filter_factory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ namespace jubatus {
namespace core {
namespace fv_converter {

#if defined(HAVE_RE2) || defined(HAVE_ONIGURUMA)
TEST(string_filter_factory, trivial) {
string_filter_factory f;
std::map<std::string, std::string> p;
Expand All @@ -34,6 +35,7 @@ TEST(string_filter_factory, trivial) {
jubatus::util::lang::shared_ptr<string_filter> filter(f.create("regexp", p));
EXPECT_TRUE(typeid(*filter) == typeid(regexp_filter));
}
#endif

TEST(string_filter_factory, unknown) {
string_filter_factory f;
Expand Down
75 changes: 42 additions & 33 deletions jubatus/core/fv_converter/wscript
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import Options

def options(opt):
opt.add_option('--enable-re2',
action='store_true',
default=False,
help='do use re2 instead of oniguruma')
opt.add_option('--regexp-library',
default='oniguruma',
help='use regexp library: oniguruma (default), re2 or none')

def configure(conf):
if Options.options.enable_re2:
regexp_lib = Options.options.regexp_library
if regexp_lib == 're2':
conf.check_cxx(lib = 're2', define_name = 'HAVE_RE2',
errmsg = 'not found')
elif regexp_lib == 'oniguruma':
conf.check_cxx(lib = 'onig', define_name = 'HAVE_ONIGURUMA',
errmsg = 'not found')
elif regexp_lib == 'none':
pass
else:
conf.check_cxx(lib = 'onig', define_name = 'HAVE_ONIGURUMA')
conf.fatal('unsupported regexp library specified: ' + regexp_lib)

def make_test(bld, use, src):
bld.program(
Expand Down Expand Up @@ -74,30 +79,7 @@ def build(bld):
'type.hpp',
'factory.hpp',
'util.hpp',
]
use = ['jubatus_util', 'MSGPACK']

if bld.env.HAVE_RE2:
source.append('re2_match.cpp')
source.append('re2_filter.cpp')
source.append('re2_splitter.cpp')
use.append('RE2')
else:
source.append('onig_match.cpp')
source.append('onig_filter.cpp')
source.append('onig_splitter.cpp')
use.append('ONIG')

bld.core_sources.extend(bld.add_prefix(source))
bld.core_headers.extend(bld.add_prefix(headers))
bld.core_use.extend(use)

n = bld.path.get_bld().make_node('test_input')
n.mkdir()
bld(rule = 'cp ${SRC} ${TGT}',
source = bld.path.ant_glob('test_input/*'),
target = n)

]
test_source = [
'json_converter_test.cpp',
'msgpack_converter_test.cpp',
Expand All @@ -119,12 +101,39 @@ def build(bld):
'keyword_weights_test.cpp',
'feature_hasher_test.cpp',
'except_match_test.cpp',
'mixable_weight_manager_test.cpp',
]
regex_test_source = [
'regexp_match_test.cpp',
'regexp_filter_test.cpp',
'regexp_splitter_test.cpp',
'mixable_weight_manager_test.cpp',
]
]

use = ['jubatus_util', 'MSGPACK']

if bld.env.HAVE_RE2:
source.append('re2_match.cpp')
source.append('re2_filter.cpp')
source.append('re2_splitter.cpp')
test_source.append(regex_test_source)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace append with +=.

use.append('RE2')
elif bld.env.HAVE_ONIGURUMA:
source.append('onig_match.cpp')
source.append('onig_filter.cpp')
source.append('onig_splitter.cpp')
test_source.append(regex_test_source)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace append with +=.

use.append('ONIG')

bld.core_sources.extend(bld.add_prefix(source))
bld.core_headers.extend(bld.add_prefix(headers))
bld.core_use.extend(use)

n = bld.path.get_bld().make_node('test_input')
n.mkdir()
bld(rule = 'cp ${SRC} ${TGT}',
source = bld.path.ant_glob('test_input/*'),
target = n)

test_use = ['jubatus_util', 'jubatus_core']

make_tests(bld, test_use, test_source)