Permalink
Browse files

Merge branch 'release/0.2.2'

  • Loading branch information...
2 parents 0a420b9 + da21b73 commit e47de9701ee8d7f5a7f71c6b61ccdb98d7c40cb7 @kuenishi kuenishi committed Apr 6, 2012
Showing with 3,449 additions and 2,278 deletions.
  1. +1 −0 .gitignore
  2. +72 −0 README.rst
  3. +2 −1 src/classifier/classifier_factory.cpp
  4. +10 −0 src/classifier/classifier_test.cpp
  5. +0 −271 src/classifier/hs_classifier.cpp
  6. +11 −6 src/common/exception.hpp
  7. +136 −0 src/common/mprpc/async_client.cpp
  8. +97 −0 src/common/mprpc/async_client.hpp
  9. +194 −0 src/common/mprpc/rpc_client.cpp
  10. +169 −0 src/common/mprpc/rpc_client.hpp
  11. +118 −0 src/common/mprpc/rpc_client_test.cpp
  12. +23 −0 src/common/mprpc/wscript
  13. +53 −0 src/common/unordered_map.hpp
  14. +6 −4 src/common/wscript
  15. +61 −0 src/framework/aggregators.hpp
  16. +27 −14 src/framework/jubatus_serv.cpp
  17. +6 −11 src/framework/jubatus_serv.hpp
  18. +0 −2 src/framework/keeper.cpp
  19. +169 −59 src/framework/keeper.hpp
  20. +20 −0 src/framework/server_util.cpp
  21. +3 −2 src/framework/wscript
  22. +20 −1 src/fv_converter/counter.hpp
  23. +15 −0 src/fv_converter/counter_test.cpp
  24. +31 −1 src/fv_converter/datum_to_fv_converter_test.cpp
  25. +74 −0 src/fv_converter/keyword_weights.cpp
  26. +72 −0 src/fv_converter/keyword_weights.hpp
  27. +57 −0 src/fv_converter/keyword_weights_test.cpp
  28. +11 −18 src/fv_converter/weight_manager.cpp
  29. +47 −5 src/fv_converter/weight_manager.hpp
  30. +46 −13 src/fv_converter/weight_manager_test.cpp
  31. +3 −1 src/fv_converter/wscript
  32. +0 −158 src/recommender/recommender_builder.cpp
  33. +0 −66 src/recommender/recommender_builder.hpp
  34. +1 −1 src/recommender/recommender_factory.cpp
  35. +2 −1 src/regression/regression_factory.cpp
  36. +2 −1 src/regression/regression_factory_test.cpp
  37. +0 −91 src/server/classifier.hpp
  38. +42 −12 src/server/classifier.idl
  39. +14 −14 src/server/classifier_client.hpp
  40. +26 −32 src/server/classifier_impl.cpp
  41. +11 −14 src/server/classifier_keeper.cpp
  42. +29 −9 src/server/classifier_serv.cpp
  43. +7 −3 src/server/classifier_serv.hpp
  44. +5 −5 src/server/classifier_server.hpp
  45. +54 −15 src/server/classifier_test.cpp
  46. +6 −6 src/server/classifier_types.hpp
  47. +57 −0 src/server/mixable_weight_manager.hpp
  48. +0 −102 src/server/recommender.hpp
  49. +69 −19 src/server/recommender.idl
  50. +36 −28 src/server/recommender_client.hpp
  51. +46 −46 src/server/recommender_impl.cpp
  52. +21 −21 src/server/recommender_keeper.cpp
  53. +56 −12 src/server/recommender_serv.cpp
  54. +12 −8 src/server/recommender_serv.hpp
  55. +15 −13 src/server/recommender_server.hpp
  56. +3 −3 src/server/recommender_test.cpp
  57. +6 −6 src/server/recommender_types.hpp
  58. +0 −78 src/server/regression.hpp
  59. +40 −10 src/server/regression.idl
  60. +10 −10 src/server/regression_client.hpp
  61. +26 −32 src/server/regression_impl.cpp
  62. +12 −14 src/server/regression_keeper.cpp
  63. +14 −8 src/server/regression_serv.cpp
  64. +3 −2 src/server/regression_serv.hpp
  65. +5 −5 src/server/regression_server.hpp
  66. +2 −2 src/server/regression_test.cpp
  67. +6 −6 src/server/regression_types.hpp
  68. +0 −25 src/server/stat.hpp
  69. +49 −13 src/server/stat.idl
  70. +26 −22 src/server/stat_client.hpp
  71. +37 −40 src/server/stat_impl.cpp
  72. +17 −18 src/server/stat_keeper.cpp
  73. +11 −8 src/server/stat_serv.cpp
  74. +8 −7 src/server/stat_serv.hpp
  75. +12 −11 src/server/stat_server.hpp
  76. +7 −7 src/server/stat_test.cpp
  77. +4 −3 src/server/wscript
  78. +79 −0 tools/generate_clients.py
  79. +4 −6 tools/generator/.gitignore
  80. +17 −12 tools/generator/OMakefile
  81. +41 −14 tools/generator/README
  82. +294 −74 tools/generator/generator.ml
  83. +0 −108 tools/generator/idl_template.ml
  84. +71 −0 tools/generator/jdl_lexer.mll
  85. +193 −0 tools/generator/jdl_parser.mly
  86. +84 −0 tools/generator/jubatus_idl.ml
  87. +0 −91 tools/generator/keeper_template.ml
  88. +0 −71 tools/generator/lexer.mll
  89. +0 −133 tools/generator/main.ml
  90. +0 −145 tools/generator/parser.mly
  91. +0 −94 tools/generator/server_template.ml
  92. +59 −0 tools/generator/small.idl
  93. +28 −110 tools/generator/stree.ml
  94. +82 −1 tools/generator/util.ml
  95. +127 −0 tools/generator/validator.ml
  96. +7 −3 wscript
View
@@ -12,3 +12,4 @@ Makefile
cscope.*
callgrind.*
.unittest-gtest
+*.tar.gz
View
@@ -9,3 +9,75 @@ LICENSE
=======
LGPL 2.1
+
+Update history
+==============
+
+Release 0.2.2 2012/4/6
+======================
+
+Improvements
+
+- Simpler interfaces at classifier, regression and recommender
+
+ - Clients are *NOT COMPATIBLE* with previous releases
+
+- Now mix works concurrently in multiple threads (except tf-idf counting)
+- Asynchronous RPC to multiple servers at once
+- Add --version option
+- Interface description language changed from C++-like to Annotated MessagePack-IDL
+- Minor error handling
+- A bit more tested than previous releases
+
+Bugfix
+
+ - #30, #29, #22
+
+Release 0.2.1 2012/3/13
+-----------------------
+
+Bugfix release: #28
+
+Release 0.2.0 2012/2/16
+-----------------------
+
+New Features
+
+- recommender
+
+ - support fast similar item search, real-time update, distributed data management
+ - inverted index : exact result, fast search
+ - locality sensitive hash : approximate result, fast search, small working space
+
+- regression
+
+ - online SVR using passive agressive algorithm
+ - as fast as current classifier
+
+- stat
+
+ - a Key(string)-Value(queue<double>)
+ - O(1) cost of getting sum, standard deviation, max, min, statistic moments for each queue
+
+- server framework
+
+ - less-tightly coupled distributed processing framework with each ML implementation
+ - idl & code generator - make it easy to write own jubatus system
+ - removed public release of client libraries (so easy to generate!)
+ - multiple mix - mutiple data objects can be mixed in one jubatus system
+
+Bugfix
+
+ - duplicate key entry in fv_converter breaks the parameter
+
+Release 0.1.1 2011/11/15
+------------------------
+
+Bugfix release
+
+Release 0.1.0 2011/10/26
+------------------------
+
+Hello Jubatus!
+
+First release: including classifier, and mix operation
@@ -17,6 +17,7 @@
#include "classifier.hpp"
#include "classifier_factory.hpp"
+#include "../common/exception.hpp"
using namespace std;
@@ -38,7 +39,7 @@ classifier_base* classifier_factory::create_classifier(const std::string& name,
} else if (name == "NHERD"){
return static_cast<classifier_base*>(new NHERD(storage));
} else {
- return NULL;
+ throw unsupported_method(name);
}
}
@@ -24,6 +24,7 @@
#include "classifier_factory.hpp"
#include "classifier.hpp"
#include "../storage/local_storage.hpp"
+#include "../common/exception.hpp"
#include "classifier_test_util.hpp"
using namespace std;
@@ -139,4 +140,13 @@ void InitClassifiers(vector<classifier_base*>& classifiers){
}
}
+
+TEST(classifier_factory, exception){
+ local_storage * p = new local_storage;
+ ASSERT_THROW(classifier_factory::create_classifier("pa", p), unsupported_method);
+ ASSERT_THROW(classifier_factory::create_classifier("", p), unsupported_method);
+ ASSERT_THROW(classifier_factory::create_classifier("saitama", p), unsupported_method);
+ delete p;
+}
+
}
@@ -1,271 +0,0 @@
-// Jubatus: Online machine learning framework for distributed environment
-// Copyright (C) 2011 Preferred Infrastracture and Nippon Telegraph and Telephone Corporation.
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-#include <fstream>
-#include "hs_classifier.hpp"
-
-using namespace std;
-
-namespace hs{
-
-HSClassifier::HSClassifier(){
-}
-
-HSClassifier::~HSClassifier(){
-}
-
-void HSClassifier::Clear(){
- feature2id_.clear();
- class2id_.clear();
- vector<string>().swap(id2class_);
- vector<vector<float> >().swap(weights_);
-}
-
-void HSClassifier::SaveKey2ID(const key2id_t& key2id,
- ofstream& ofs) const{
- uint64_t key_num = key2id.size();
- ofs.write((const char*)& key_num, sizeof(key_num) * 1);
-
- for (key2id_t::const_iterator it = key2id.begin();
- it != key2id.end(); ++it){
- const string& key = it->first;
- uint64_t len = key.size();
- ofs.write((const char*) &len, sizeof(len) * 1);
- ofs.write((const char*) &key[0], sizeof(key[0]) * len);
- uint64_t val = it->second;
- ofs.write((const char*) &val, sizeof(val));
- }
-}
-
-void HSClassifier::LoadKey2ID(key2id_t& key2id,
- ifstream& ifs) {
- uint64_t key_num = 0;
- ifs.read((char*)&key_num, sizeof(key_num) * 1);
-
- for (uint64_t i = 0; i < key_num; ++i){
- uint64_t len = 0;
- ifs.read((char*) &len, sizeof(len) * 1);
- string key;
- key.resize(len);
- ifs.read((char*) &key[0], sizeof(key[0]) * len);
- uint64_t val = 0;
- ifs.read((char*) &val, sizeof(val));
- key2id[key] = val;
- }
-}
-
-
-int HSClassifier::Save(const string& filename) const{
- ofstream ofs(filename.c_str());
- if (!ofs){
- return -1;
- }
- SaveKey2ID(feature2id_, ofs);
- SaveKey2ID(class2id_, ofs);
-
- for (size_t i = 0; i < weights_.size(); ++i){
- const vector<float>& v = weights_[i];
- ofs.write((const char*)&v[0], sizeof(v[0]) * v.size());
- }
-
- if (!ofs){
- return -1;
- }
-
- return 0;
-}
-
-int HSClassifier::Load(const string& filename) {
- Clear();
- ifstream ifs(filename.c_str());
- if (!ifs){
- return -1;
- }
- LoadKey2ID(feature2id_, ifs);
- LoadKey2ID(class2id_, ifs);
-
- id2class_.resize(class2id_.size());
- for (key2id_t::const_iterator it = class2id_.begin(); it != class2id_.end(); ++it){
- id2class_[it->second] = it->first;
- }
- weights_.resize(class2id_.size(), vector<float>(feature2id_.size()));
-
- if (!ifs) return -1;
-
- return 0;
-}
-
-void HSClassifier::Train(const key_str_t& input,
- const string& output){
- vector<uint64_t> features;
- for (key_str_t::const_iterator it = input.begin(); it != input.end(); ++it){
- ExtractFeatureConst(it->first, it->second, features);
- }
- uint64_t target_class = GetClassID(output);
- vector<float> scores;
- CalcScores(features, scores);
- uint64_t max_score_class = GetMaxScoreID(scores);
- if (target_class == max_score_class) {
- return; // no update
- }
-
- features.clear();
- for (key_str_t::const_iterator it = input.begin(); it != input.end(); ++it){
- ExtractFeature(it->first, it->second, features);
- }
- Update(features, target_class, max_score_class);
-}
-
-void HSClassifier::Update(const std::vector<uint64_t>& features,
- const uint64_t target_class, const uint64_t max_score_class) {
- for (size_t i = 0; i < features.size(); ++i){
- uint64_t id = features[i];
- weights_[target_class][id] += 1;
- weights_[max_score_class][id] -= 1;
- }
-}
-
-uint64_t HSClassifier::GetMaxScoreID(const vector<float>& scores) const {
- if (scores.size() == 0) return 0;
- float max_score = scores[0];
- uint64_t max_id = 0;
- for (size_t i = 1; i < scores.size(); ++i){
- if (scores[i] > max_score) {
- max_score = scores[i];
- max_id = i;
- }
- }
- return max_id;
-}
-
-void HSClassifier::CalcScores(const vector<uint64_t>& features, vector<float>& scores) const{
- size_t class_num = id2class_.size();
- scores.resize(class_num);
- fill(scores.begin(), scores.end(), 0.f);
- for (size_t i = 0; i < features.size(); ++i){
- uint64_t id = features[i];
- for (size_t j = 0; j < class_num; ++j){
- scores[j] += weights_[j][id];
- }
- }
-}
-
-void HSClassifier::ExtractFeatureConst(const std::string& field, const std::string& value,
- vector<uint64_t>& features) const{
- uint64_t total_id = GetIDConst(field + "/" + value);
- if (total_id != NOTFOUND){
- features.push_back(total_id);
- }
-
- // UTF-8 bigram feature
- string cur;
- string prev;
- bool first = true;
- for (size_t i = 0; ; ++i){
- if (first ||
- (i != value.size() && (value[i] & 0xC0) == 0x80)){
- cur += value[i];
- first = false;
- continue;
- }
- uint64_t term_id = GetIDConst(field + "/" + prev + cur);
- if (term_id != NOTFOUND){
- features.push_back(term_id);
- }
- if (i == value.size()) break;
- prev = cur;
- cur = value[i];
- }
-}
-
-void HSClassifier::ExtractFeature(const std::string& field, const std::string& value,
- vector<uint64_t>& features) {
- features.push_back(GetID(field + "/" + value));
-
- // UTF-8 bigram feature
- string cur;
- string prev;
- bool first = true;
- for (size_t i = 0; ; ++i){
- if (first ||
- (i != value.size() && (value[i] & 0xC0) == 0x80)){
- cur += value[i];
- first = false;
- continue;
- }
- features.push_back(GetID(field + "/" + prev + cur));
-
- if (i == value.size()) break;
- prev = cur;
- cur = value[i];
- }
-}
-
-
-
-key_double_t HSClassifier::Classify(const key_str_t& input) const {
- vector<uint64_t> features;
- for (key_str_t::const_iterator it = input.begin(); it != input.end(); ++it){
- ExtractFeatureConst(it->first, it->second, features);
- }
- vector<float> scores;
- CalcScores(features, scores);
-
- key_double_t class2score;
- for (size_t i = 0; i < scores.size(); ++i){
- class2score[id2class_[i]] = scores[i];
- }
- return class2score;
-}
-
-uint64_t HSClassifier::GetID(const string& key){
- key2id_t::const_iterator it = feature2id_.find(key);
- if (it != feature2id_.end()){
- return it->second;
- }
- uint64_t new_id = static_cast<uint64_t>(feature2id_.size());
- feature2id_[key] = new_id;
- for (size_t i = 0; i < weights_.size(); ++i){
- weights_[i].resize(new_id+1);
- }
- return new_id;
-}
-
-uint64_t HSClassifier::GetIDConst(const string& key) const{
- key2id_t::const_iterator it = feature2id_.find(key);
- if (it != feature2id_.end()){
- return it->second;
- } else {
- return NOTFOUND;
- }
-}
-
-uint64_t HSClassifier::GetClassID(const string& output){
- key2id_t::const_iterator it = class2id_.find(output);
- if (it != class2id_.end()){
- return it->second;
- }
- uint64_t new_id = static_cast<uint64_t>(class2id_.size());
- class2id_[output] = new_id;
- id2class_.push_back(output);
-
- weights_.resize(new_id+1);
- weights_[new_id].resize(feature2id_.size());
- return new_id;
-}
-
-}
Oops, something went wrong.

0 comments on commit e47de97

Please sign in to comment.