Permalink
Browse files

заготовка

  • Loading branch information...
kmike committed Aug 24, 2012
0 parents commit 9ed199d8a6846438ddf75caca8329026f58db8c9
@@ -0,0 +1,22 @@
+#projects
+\.idea
+
+#temp files
+\.pyc
+\.orig
+
+#os files
+\.DS_Store
+Thumbs.db
+
+#project-specific files
+\.tox
+stuff
+MANIFEST$
+^docs/_
+^build
+^pymorphy2.egg-info
+^pymorphy2/.*\.html$
+^dicts
+^pymorphy2/.*\.pxd$
+
@@ -0,0 +1,7 @@
+include AUTHORS.rst
+include README.rst
+include CHANGES.rst
+include docs/Makefile
+include docs/make.bat
+include docs/conf.py
+recursive-include docs *.rst
@@ -0,0 +1,32 @@
+pymorphy II
+===========
+
+Morphological analyzer (POS tagger + inflection engine)
+for Russian language.
+
+Docs: http://pymorphy2.rtfd.org
+
+Source code:
+
+* https://github.com/kmike/pymorphy2
+* https://bitbucket.org/kmike/pymorphy2
+
+Bug tracker: https://github.com/kmike/pymorphy2/issues
+
+Licensed under MIT.
+
+(ru) pymorphy II
+----------------
+
+Морфологический анализатор для русского языка.
+
+Документация: http://pymorphy2.rtfd.org
+
+Исходный код:
+
+* https://github.com/kmike/pymorphy2
+* https://bitbucket.org/kmike/pymorphy2
+
+Баг-трекер: https://github.com/kmike/pymorphy2/issues
+
+Лицензия - MIT.
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Pymorphy2 benchmark utility.
+
+Usage:
+ bench.py run [--verbose]
+ bench.py -h | --help
+ bench.py --version
+
+Options:
+ -v --verbose Be more verbose
+
+"""
+import logging
+import sys
+import os
+from pymorphy2.vendor import docopt
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+
+import pymorphy2
+
+from benchmarks import speed
+
+logger = logging.getLogger('pymorphy2.bench')
+logger.addHandler(logging.StreamHandler())
+
+
+def main():
+ """ CLI interface dispatcher """
+ args = docopt(__doc__, version=pymorphy2.__version__)
+
+ if args['--verbose']:
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger.setLevel(logging.INFO)
+
+ if args['run']:
+ speed.bench_all()
+
+ #logger.debug(args)
+ return 0
+
+
+if __name__ == '__main__':
+ import sys
+ sys.exit(main())
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals, division
+import logging
+import codecs
+import os
+
+from pymorphy2 import data, tagger
+from benchmarks import utils
+
+logger = logging.getLogger('pymorphy2.bench')
+
+DATA_PATH = os.path.join(os.path.dirname(__file__), 'unigrams.cyr.lc')
+DICT_PATH = os.path.abspath(
+ os.path.join(os.path.dirname(__file__), '..', 'ru.dict')
+)
+
+def get_dict():
+ return data.load_dict(DICT_PATH)
+
+def load_words(path=DATA_PATH):
+ words = []
+ with codecs.open(path, 'r', 'utf8') as f:
+ for line in f:
+ word, count, ipm = line.split()
+ count = int(count)
+ words.append((word.upper(), count))
+ return words
+
+def scale_usages(words, result_size):
+ total = sum(w[1] for w in words) + len(words) # add-one smoothing
+ return [(w[0], int(round((w[1]+1)*result_size/total))) for w in words]
+
+def bench_tag():
+ logger.debug("loading benchmark data...")
+ all_words = load_words()
+
+ words = all_words#[0:2000]
+ #words = scale_usages(words, corpus_size)
+
+ total_usages = sum(w[1] for w in words)
+
+ logger.debug("benchmarking...")
+ logger.debug("Words: %d, usages: %d", len(words), total_usages)
+
+ def _run():
+ for word, cnt in words:
+ for x in range(cnt):
+ tagger.tag(dct, word)
+
+ logger.info(" tagger.tag: %0.0f words/sec", utils.measure(_run, total_usages, 1))
+
+
+
+def bench_all():
+ """ Run all benchmarks """
+ bench_tag()
Oops, something went wrong.

0 comments on commit 9ed199d

Please sign in to comment.