Skip to content

Commit 3e65819

Browse files
authored
Merge b42b40b into cde8733
2 parents cde8733 + b42b40b commit 3e65819

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+182
-408
lines changed

benchmarks/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import

benchmarks/bench.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
31
"""
42
Pymorphy2 benchmark utility.
53
@@ -15,8 +13,9 @@
1513
1614
"""
1715
import logging
18-
import sys
1916
import os
17+
import sys
18+
2019
from docopt import docopt
2120

2221
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

benchmarks/shrink-unigrams.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
"cell_type": "code",
1919
"collapsed": false,
2020
"input": [
21-
"from __future__ import unicode_literals\n",
2221
"import random\n",
2322
"import math\n",
2423
"random.seed(0)\n",

benchmarks/speed.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import, unicode_literals, division
3-
import logging
41
import codecs
5-
import os
6-
import functools
72
import datetime
3+
import functools
4+
import logging
5+
import os
86

9-
from pymorphy2 import MorphAnalyzer
107
from benchmarks import utils
8+
from pymorphy2 import MorphAnalyzer
119

1210
logger = logging.getLogger('pymorphy2.bench')
1311

benchmarks/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import, unicode_literals, division
1+
import gc
32
import time
43
import timeit
5-
import gc
4+
65

76
def measure(func, inner_iterations=1, repeats=5):
87
"""

docs/conf.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
#
31
# pymorphy2 documentation build configuration file, created by
42
# sphinx-quickstart on Sun Jul 29 04:34:30 2012.
53
#
@@ -10,9 +8,9 @@
108
#
119
# All configuration values have a default; values that are commented out
1210
# serve to show the default.
13-
from __future__ import unicode_literals
1411

15-
import sys, os
12+
import os
13+
import sys
1614

1715
# If extensions (or modules to document with autodoc) are in another directory,
1816
# add these directories to sys.path here. If the directory is relative to the

pymorphy2/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
1-
# -*- coding: utf-8 -*-
2-
from .version import __version__
31
from .analyzer import MorphAnalyzer
2+
from .version import __version__

pymorphy2/analyzer.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import print_function, unicode_literals, division
3-
import os
4-
import heapq
51
import collections
2+
import heapq
63
import logging
7-
import threading
84
import operator
5+
import os
6+
import threading
97
import warnings
108

9+
import pymorphy2.lang
1110
from pymorphy2 import opencorpora_dict
1211
from pymorphy2.dawg import ConditionalProbDistDAWG
13-
import pymorphy2.lang
1412

1513
logger = logging.getLogger(__name__)
1614

@@ -66,7 +64,7 @@ def normalized(self):
6664
# return self._dict.build_paradigm_info(self.para_id)
6765

6866

69-
class ProbabilityEstimator(object):
67+
class ProbabilityEstimator:
7068
def __init__(self, dict_path):
7169
cpd_path = os.path.join(dict_path, 'p_t_given_w.intdawg')
7270
self.p_t_given_w = ConditionalProbDistDAWG().load(cpd_path)
@@ -140,14 +138,12 @@ def lang_dict_path(lang):
140138
return lang_paths[lang]
141139

142140
raise ValueError(
143-
"Can't find a dictionary for language %r. Installed languages: %r. "
144-
"Try installing pymorphy2-dicts-%s package." % (
145-
lang, list(lang_paths.keys()), lang
146-
)
141+
f"Can't find a dictionary for language {lang!r}. Installed languages: {list(lang_paths.keys())!r}. "
142+
f"Try installing pymorphy2-dicts-{lang} package."
147143
)
148144

149145

150-
class MorphAnalyzer(object):
146+
class MorphAnalyzer:
151147
"""
152148
Morphological analyzer for Russian language.
153149
@@ -293,8 +289,8 @@ def choose_language(cls, dictionary, lang):
293289
if dictionary.lang != lang:
294290
# allow incorrect 'lang' values, but show a warning
295291
warnings.warn(
296-
"Dictionary language (%r) doesn't match "
297-
"analyzer language (%r)." % (dictionary.lang, lang)
292+
f"Dictionary language ({dictionary.lang!r}) doesn't match "
293+
f"analyzer language ({lang!r})."
298294
)
299295

300296
return lang

pymorphy2/cache.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

pymorphy2/cli.py

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
3-
from __future__ import unicode_literals, print_function, division
4-
5-
import sys
61
import logging
7-
import time
8-
import codecs
92
import operator
3+
import sys
4+
import time
5+
from functools import lru_cache
106

117
import pymorphy2
12-
from pymorphy2.cache import lru_cache, memoized_with_single_argument
13-
from pymorphy2.utils import get_mem_usage
148
from pymorphy2.tokenizers import simple_word_tokenize
15-
16-
PY2 = sys.version_info[0] == 2
9+
from pymorphy2.utils import get_mem_usage
1710

1811
# Hacks are here to make docstring compatible with both
1912
# docopt and sphinx.ext.autodoc.
@@ -80,10 +73,7 @@ def main(argv=None):
8073
else:
8174
score, lemmatize, tag = True, True, True
8275

83-
if PY2:
84-
out_file = codecs.getwriter('utf8')(sys.stdout)
85-
else:
86-
out_file = sys.stdout
76+
out_file = sys.stdout
8777

8878
return parse(
8979
morph=morph,
@@ -112,14 +102,9 @@ def main(argv=None):
112102
def _open_for_read(fn):
113103
""" Open a file for reading """
114104
if fn in ['-', '', None]:
115-
if PY2:
116-
return codecs.getreader('utf8')(sys.stdin)
117-
else:
118-
return sys.stdin
119-
if PY2:
120-
return codecs.open(fn, 'rt', encoding='utf8')
121-
else:
122-
return open(fn, 'rt', encoding='utf8')
105+
return sys.stdin
106+
107+
return open(fn, 'rt', encoding='utf8')
123108

124109

125110
# ============================ Commands ===========================
@@ -178,7 +163,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
178163

179164
_parse = parser.parse
180165
if cache_size == 'unlim':
181-
_parse = memoized_with_single_argument({})(_parse)
166+
_parse = lru_cache(None)(_parse)
182167
else:
183168
cache_size = int(cache_size)
184169
if cache_size:
@@ -189,7 +174,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
189174
_write(_parse(token))
190175

191176

192-
class _TokenParserFormatter(object):
177+
class _TokenParserFormatter:
193178
"""
194179
This class defines its `parse` method based on arguments passed.
195180
Some ugly code is to make all ifs work only once, not for each token.
@@ -218,14 +203,14 @@ def __init__(self, morph, score, normal_form, tag, newlines, thresh):
218203
if score:
219204
def _parse_token(tok):
220205
seq = [
221-
"%s:%0.3f=%s" % (p.normal_form, p.score, p.tag)
206+
f"{p.normal_form}:{p.score:0.3f}={p.tag}"
222207
for p in morph_parse(tok) if p.score >= thresh
223208
]
224209
return tpl % (tok, join(seq))
225210
else:
226211
def _parse_token(tok):
227212
seq = [
228-
"%s:%s" % (p.normal_form, p.tag)
213+
f"{p.normal_form}:{p.tag}"
229214
for p in morph_parse(tok) if p.score >= thresh
230215
]
231216
return tpl % (tok, join(seq))
@@ -241,7 +226,7 @@ def _parse_token(tok):
241226
key=val, reverse=True
242227
)
243228
if score:
244-
seq = ["%s:%0.3f" % (lemma, w) for (lemma, w) in items]
229+
seq = [f"{lemma}:{w:0.3f}" for (lemma, w) in items]
245230
else:
246231
seq = [lemma for (lemma, w) in items]
247232

@@ -250,7 +235,7 @@ def _parse_token(tok):
250235
if score:
251236
def _parse_token(tok):
252237
seq = [
253-
"%0.3f=%s" % (p.score, p.tag)
238+
f"{p.score:0.3f}={p.tag}"
254239
for p in morph_parse(tok) if p.score >= thresh
255240
]
256241
return tpl % (tok, join(seq))

0 commit comments

Comments
 (0)