Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
kmaehashi committed Feb 21, 2014
2 parents 411e49f + a1c7056 commit a0a0256
Show file tree
Hide file tree
Showing 14 changed files with 272 additions and 70 deletions.
153 changes: 153 additions & 0 deletions check_idl.py
@@ -0,0 +1,153 @@
#!/usr/bin/env python

import argparse
import json
import glob
import os
import re
import sys

ident = r'[a-z0-9_<>, ]+'
arg = r'[0-9]+: *%s +%s' % (ident, ident)

idl_message_pattern = re.compile(r'message ([a-z0-9_]+)(\(.*\))? {')
idl_member_pattern = re.compile(r' *([0-9]+: %s +%s)' % (ident, ident))
idl_method_pattern = re.compile(r'\s*(%s +%s\( *(%s( *, *%s)*)? *\))\s*(#.*)?'
% (ident, ident, arg, arg))

rst_message_pattern = re.compile(r'\.\. mpidl:message:: ([0-9a-z_]+)')
rst_member_pattern = re.compile(r' \.\. mpidl:member:: ([0-9]+: %s %s)' % (ident, ident))
rst_service_pattern = re.compile(r'\.\. mpidl:service:: ([0-9a-z_]+)')
rst_method_pattern = re.compile(r' \.\. mpidl:method:: (%s +%s\( *(%s( *, *%s)*)? *\))'
% (ident, ident, arg, arg))

def find_idl_message(lines):
messages = []
message = None
members = []
for l in lines:
if message:
m = idl_member_pattern.match(l)
if m:
members.append(m.group(1))
if '}' in l:
messages.append({"name": message, "members": members})
message = None
members = []
else:
m = idl_message_pattern.match(l)
if m:
message = m.group(1)
return messages

def find_idl_service(lines):
methods = []
internal = False
for line in lines:
m = idl_method_pattern.match(line)
if m:
if not internal:
methods.append(m.group(1))
internal = False
internal = '@internal' in line

# clear() API is not documented int rst now.
if 'bool clear()' in methods:
methods.remove('bool clear()')

return methods

def read_idl(path):
with open(path) as io:
lines = [l for l in io]
messages = find_idl_message(lines)
services = find_idl_service(lines)
services.sort()
return {
'messages': messages,
'services': services,
}

def find_rst_message(lines):
messages = []
message = None
members = []
for line in lines:
if message:
m = rst_member_pattern.match(line)
if m:
members.append(m.group(1))
if line.strip() != '' and line[0] != ' ':
messages.append({'name': message, 'members': members})
message = None
members = []

m = rst_message_pattern.match(line)
if m:
message = m.group(1)
return messages

def find_rst_service(lines):
methods = []
for line in lines:
m = rst_method_pattern.match(line)
if m:
methods.append(m.group(1))
return methods

def read_rst(path):
with open(path) as io:
lines = [l for l in io]
messages = find_rst_message(lines)
services = find_rst_service(lines)
services.sort()
return {
'messages': messages,
'services': services,
}

def compare_idl_and_rst(idl_path, rst_path):
idl = read_idl(idl_path)
rst = read_rst(rst_path)

error = False
if idl['messages'] != rst['messages']:
print rst_path
print 'idl:', idl['messages']
print 'rst:', rst['messages']
error = True

if idl['services'] != rst['services']:
print rst_path
print 'idl:', idl['services']
print 'rst:', rst['services']
error = True

return error

def run(langs, jubatus_root, document_root):
error = False
for lang in langs:
# find in JUBATUS_root/jubatus/server/server/*.idl
pattern = os.path.join(jubatus_root, 'jubatus', 'server', 'server', '*.idl')
for idl_path in glob.glob(pattern):
api = os.path.splitext(os.path.basename(idl_path))[0]
rst_path = os.path.join(document_root, 'source', lang, 'api_%s.rst' % api)
if not os.path.exists(rst_path):
print ('RST is not found: %s' % rst_path)
else:
error |= compare_idl_and_rst(idl_path, rst_path)
if error:
sys.exit(-1)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-j', '--jubatus', required=True,
help='path to Jubatus directory')
parser.add_argument('-d', '--document', default='.',
help='path to document direcotry (default: ".")')
parser.add_argument('-l', '--lang', action='append', default=['ja', 'en'],
help='language to check (default: "ja", "en")')

args = parser.parse_args()
run(args.lang, args.jubatus, args.document)
2 changes: 1 addition & 1 deletion source/en/faq_rpc_err_workaround.rst
Expand Up @@ -47,7 +47,7 @@ Ruby
+ When RPC methods mismatching, type mismatching errors occur by RPC, ``Jubatus::Common::InterfaceMismatch`` exception will be raised.
When algorithm errors occur by RPC, ``MessagePack::RPC::RemoteError`` or ``MessagePack::RPC::CallError`` exception will be raised.
You should catch these exceptions and close sessions explicitly.
You can catch any RPC exceptions by ``RPCError`` excluding ``TrasportError`` and ``TimeoutError`` .
You can catch any RPC exceptions by ``RPCError`` excluding ``TransportError`` and ``TimeoutError`` .

.. code-block:: ruby
Expand Down
4 changes: 2 additions & 2 deletions source/en/fv_convert.rst
Expand Up @@ -72,7 +72,7 @@ The following is an example of a datum.
Name of keys cannot contain "$" sign.

For example, a datum consists of ``std::vector<std::pair<std::string, std::string> >`` , ``std::vector<std::pair<std::stirng, double> >`` and ``std::vector<std::pair<std::string, std::string> >`` in C++.
For example, a datum consists of ``std::vector<std::pair<std::string, std::string> >`` , ``std::vector<std::pair<std::string, double> >`` and ``std::vector<std::pair<std::string, std::string> >`` in C++.
``std::pair<T,U>`` (resp. ``std::vector<T>``) is to C++ what tuple (resp. vector) is to Python.

Flow of Data Conversion
Expand Down Expand Up @@ -531,7 +531,7 @@ In CLASS_types (CLASS is either ``string`` or ``num``), we should specify "dynam
Path to the plugin may either be a full path, or a file name if the plugin is in the default plugin directory (``$PREFIX/lib/jubatus/plugin`` or ``$PREFIX/lib64/jubatus/plugin`` in most cases).
Argument of the function is specified by other parameters.

In Jubatus we can make use of three pre-defined plugins which aim to extraction of features from strings.
In Jubatus we can make use of two pre-defined plugins which aim to extraction of features from strings.
Note that some plugins are not available depending on your compile options.

.. describe:: libmecab_splitter.so
Expand Down
7 changes: 4 additions & 3 deletions source/en/misc.rst
Expand Up @@ -8,9 +8,10 @@ Publications

2013
~~~~

2013/06/26-27
Shohei Hido, **Jubatus: real-time and highly-scalable machine learning platform**, Hadoop Summit 2013, San Jose. [`link <http://hadoopsummit2013.uservoice.com/forums/196822-future-of-apache-hadoop/suggestions/3714873-jubatus-real-time-and-highly-scalable-machine-lea>`__] *To Appear*
2013/12/09
Shohei Hido, Seiya Tokui, Satoshi Oda, **Jubatus: An Open Source Platform for Distributed Online Machine Learning**, NIPS 2013 Workshop on Big Learning, Lake Tahoe. [`link <http://biglearn.org/index.php/Papers#posters>`__, `pdf <http://www.biglearn.org/2013/files/papers/biglearning2013_submission_24.pdf>`__]
2013/06/27
Shohei Hido, **Jubatus: real-time and highly-scalable machine learning platform**, Hadoop Summit 2013, San Jose. [`slide <http://www.slideshare.net/pfi/hido-june27-1150amroom210c>`__]
2013/03/22
**Smarter instant analysis of "Current" with big data: Jubatus** [`video <http://www.youtube.com/watch?v=HOEqnZqvtUI>`__]

Expand Down
2 changes: 1 addition & 1 deletion source/en/quickstart.rst
Expand Up @@ -157,7 +157,7 @@ Please add these lines to ``pom.xml`` of your project.
<dependency>
<groupId>us.jubat</groupId>
<artifactId>jubatus</artifactId>
<version>0.5.1</version>
<version>[0.5,)</version>
</dependency>
</dependencies>
Expand Down
16 changes: 16 additions & 0 deletions source/en/server.rst
Expand Up @@ -95,6 +95,22 @@ Here is a example of MessagePack-IDL with annotation.
int clear()
}

The following RPC methods for server are automatically appended to each service by ``jenerator``:

.. code-block:: c++

#@random #@analysis #@pass
string get_config()

#@broadcast #@analysis #@all_and
bool save(0: string id)

#@broadcast #@update #@all_and
bool load(0: string id)

#@broadcast #@analysis #@merge
map<string, map<string, string> > get_status()


Building ``jenerator``
~~~~~~~~~~~~~~~~~~~~~~
Expand Down
60 changes: 33 additions & 27 deletions source/en/tutorial/regression_python.rst
Expand Up @@ -7,7 +7,7 @@ Here we explain the sample program of Regression in Python.
Source_code
--------------------------------

In this sample program, we will explain 1) how to configure the learning-algorithms that used by Jubatus, with the example file 'rent.json'; 2) how to train and predict by 'main.py' with the training data in 'rent-data.csv' file and the estimation data in 'myhome.yml' file. Here are their source codes.
In this sample program, we will explain 1) how to configure the learning-algorithms that used by Jubatus, with the example file 'rent.json'; 2) how to train and predict by 'jubahomes.py' with the training data in 'rent-data.csv' file and the estimation data in 'myhome.yml' file. Here are their source codes.


**rent.json**
Expand Down Expand Up @@ -37,18 +37,27 @@ In this sample program, we will explain 1) how to configure the learning-algorit
}
}
**main.py**
**jubahomes.py**

.. code-block:: python
:linenos:
#!/usr/bin/env python
import argparse
import yaml
from jubatus.common import Datum
from jubatus.regression.client import Regression
from jubatus.regression.types import *
from jubahomes.version import get_version
VERSION = (0, 0, 1, '')
def get_version():
version_string = '%s.%s.%s' % VERSION[0:3]
if len(VERSION[3]):
version_string += '-' + VERSION[3]
return version_string
def parse_options():
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -118,6 +127,9 @@ In this sample program, we will explain 1) how to configure the learning-algorit
result = client.estimate(analyze_data)
print 'rent ....', round(result[0], 1)
if __name__ == '__main__':
main()
**myhome.yml**
Expand Down Expand Up @@ -174,7 +186,7 @@ This JSON file give the configuration information. Here are the meanings of the
In addition, the 'regularization_weight' above plays various roles in different algorithms, so please be careful in configuring its values in different algorithms.


**main.py**
**jubahomes.py**


We explain the learning and prediction processes.
Expand All @@ -183,7 +195,7 @@ To write the Client program for Regression, we can use the Regression class defi
There are two methods used in this program. The 'train' method for learning process, and the 'estimate' method for prediction with the trained model.

1. Connect to Jubatus Server
Connect to Jubatus Server (Line 35)
Connect to Jubatus Server (Line 44)

Setting the IP addr, RPC port number of Jubatus Server and the unique name for task identification in Zookeeper.

Expand Down Expand Up @@ -243,32 +255,32 @@ There are two methods used in this program. The 'train' method for learning proc

Here is the detailed process for making the training data in this sample.

Next, read the source file (CSV file) of the training data line by line (Line 40-58).
Split the data read from each line in CSV file, by the ',' mark (Line 48).
Next, read the source file (CSV file) of the training data line by line (Line 49-67).
Split the data read from each line in CSV file, by the ',' mark (Line 57).

The string items and double items are stored into the Datum consturctor of as a dictionary object (Line 49-55), respectively.
Finally, the Datum is appended with the rent label, so as to be used as one piece of training data (argument 'train' in Line 55).
The string items and double items are stored into the Datum consturctor of as a dictionary object (Line 58-63), respectively.
Finally, the Datum is appended with the rent label, so as to be used as one piece of training data (argument 'train' in Line 64).

3. Model Training (update learning model)
Input the training data generated in step.2 into the train() method (Line 58).
Input the training data generated in step.2 into the train() method (Line 67).
The parameter specifies the train_data generated in step.2.

4. Prepare the prediction data
Prepare the prediction data in the similar way of training Datum creation.
Here, we generate the data for prediction by using the YAML file (please download the library `JYaml <http://jyaml.sourceforge.net/download.html>`_ )
Here, we generate the data for prediction by using the YAML file (please download the library `PyYaml <http://pyyaml.org/>`_ )
YAML is one kind of data format, in which objects and structure data are serialized.

Read the YAML file (myhome.yml) by yaml.load() and get the return value in dict type (Line 65).
Generate the prediction Datum by using the simliar process as in step 2 (Line 66-72).
Read the YAML file (myhome.yml) by yaml.load() and get the return value in dict type (Line 74).
Generate the prediction Datum by using the simliar process as in step 2 (Line 75-81).

Add the Datum into the prediction data list, and send it into the estimate() method in "Regression" for prediction.

5. Prediction based on trained model
The prediction results are returned as a list by the estimate() method (Line 74).
The prediction results are returned as a list by the estimate() method (Line 83).

6. Output the result
The prediction results are returned in the same order of the prediction data. (In this sample, only one prediction data is used, thus only one result is returned.)
The result is rounded at 2nd decimal for output, because it is in Float type (Line 76).
The result is rounded at 2nd decimal for output, because it is in Float type (Line 85).


------------------------------------
Expand All @@ -284,17 +296,11 @@ Run the sample program


* For Jubatus Client
Install the command line aplication for using this sample program.

::

$ sudo python setup.py install

Specify the option by using the command below.

::

$ jubahomes -t dat/rent-data.csv -a dat/myhome.yml
$ python jubahomes.py -t ../dat/rent-data.csv -a ../dat/myhome.yml

-t :CSV file name (if there is training data)
-a :YML file name (required)
Expand All @@ -306,12 +312,12 @@ Run the sample program
train ... 145
rent .... 9.9

You can change the dat/myhome.yaml file to predict housing rent under various conditions.
You can change the myhome.yaml file to predict housing rent under various conditions.

::

$ edit dat/myhome.yml
$ jubahomes -a dat/myhome.yml
$ edit dat/myhome.yml
$ jubahomes -a dat/myhome.yml
$ edit ../dat/myhome.yml
$ python jubahomes.py -a ../dat/myhome.yml
$ edit ../dat/myhome.yml
$ python jubahomes.py -a ../dat/myhome.yml
:
2 changes: 1 addition & 1 deletion source/en/tutorial_distributed.rst
Expand Up @@ -66,7 +66,7 @@ Jubatus proxies proxy RPC requests from clients to servers.
In distributed environment, make RPC requests from clients to proxies, not directly to servers.

Jubatus proxies are provided for each Jubatus servers.
For the classifier, ``jubaclassifier_keeper`` is the corresponding proxy.
For the classifier, ``jubaclassifier_proxy`` is the corresponding proxy.

::

Expand Down
2 changes: 1 addition & 1 deletion source/ja/faq_rpc_err_workaround.rst
Expand Up @@ -46,7 +46,7 @@ Ruby

+ RPC呼び出しで、メソッド名や型の不一致が発生した場合は ``Jubatus::Common::InterfaceMismatch`` 例外が発生します。
アルゴリズムのエラーが発生した場合は ``MessagePack::RPC::RemoteError`` または ``MessagePack::RPC::CallError`` 例外が発生します。
これらの例外を捕捉し、接続を明示的に破棄します。 ``TrasportError`` および ``TimeoutError``以外の ``RPCError`` をまとめて捕捉してもよいでしょう。
これらの例外を捕捉し、接続を明示的に破棄します。 ``TransportError`` および ``TimeoutError`` 以外の ``RPCError`` をまとめて捕捉してもよいでしょう。

.. code-block:: ruby
Expand Down

0 comments on commit a0a0256

Please sign in to comment.