1
- # -*- coding: utf-8 -*-
2
- from __future__ import absolute_import
3
- from __future__ import unicode_literals , print_function , division
4
-
5
- import sys
6
1
import logging
7
- import time
8
- import codecs
9
2
import operator
3
+ import sys
4
+ import time
5
+ from functools import lru_cache
10
6
11
7
import pymorphy2
12
- from pymorphy2 .cache import lru_cache , memoized_with_single_argument
13
- from pymorphy2 .utils import get_mem_usage
14
8
from pymorphy2 .tokenizers import simple_word_tokenize
15
-
16
- PY2 = sys .version_info [0 ] == 2
9
+ from pymorphy2 .utils import get_mem_usage
17
10
18
11
# Hacks are here to make docstring compatible with both
19
12
# docopt and sphinx.ext.autodoc.
@@ -80,10 +73,7 @@ def main(argv=None):
80
73
else :
81
74
score , lemmatize , tag = True , True , True
82
75
83
- if PY2 :
84
- out_file = codecs .getwriter ('utf8' )(sys .stdout )
85
- else :
86
- out_file = sys .stdout
76
+ out_file = sys .stdout
87
77
88
78
return parse (
89
79
morph = morph ,
@@ -112,14 +102,9 @@ def main(argv=None):
112
102
def _open_for_read (fn ):
113
103
""" Open a file for reading """
114
104
if fn in ['-' , '' , None ]:
115
- if PY2 :
116
- return codecs .getreader ('utf8' )(sys .stdin )
117
- else :
118
- return sys .stdin
119
- if PY2 :
120
- return codecs .open (fn , 'rt' , encoding = 'utf8' )
121
- else :
122
- return open (fn , 'rt' , encoding = 'utf8' )
105
+ return sys .stdin
106
+
107
+ return open (fn , 'rt' , encoding = 'utf8' )
123
108
124
109
125
110
# ============================ Commands ===========================
@@ -178,7 +163,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
178
163
179
164
_parse = parser .parse
180
165
if cache_size == 'unlim' :
181
- _parse = memoized_with_single_argument ({} )(_parse )
166
+ _parse = lru_cache ( None )(_parse )
182
167
else :
183
168
cache_size = int (cache_size )
184
169
if cache_size :
@@ -189,7 +174,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
189
174
_write (_parse (token ))
190
175
191
176
192
- class _TokenParserFormatter ( object ) :
177
+ class _TokenParserFormatter :
193
178
"""
194
179
This class defines its `parse` method based on arguments passed.
195
180
Some ugly code is to make all ifs work only once, not for each token.
@@ -218,14 +203,14 @@ def __init__(self, morph, score, normal_form, tag, newlines, thresh):
218
203
if score :
219
204
def _parse_token (tok ):
220
205
seq = [
221
- "%s:%0.3f=%s" % ( p .normal_form , p .score , p .tag )
206
+ f" { p .normal_form } : { p .score :0.3f } = { p .tag } "
222
207
for p in morph_parse (tok ) if p .score >= thresh
223
208
]
224
209
return tpl % (tok , join (seq ))
225
210
else :
226
211
def _parse_token (tok ):
227
212
seq = [
228
- "%s:%s" % ( p .normal_form , p .tag )
213
+ f" { p .normal_form } : { p .tag } "
229
214
for p in morph_parse (tok ) if p .score >= thresh
230
215
]
231
216
return tpl % (tok , join (seq ))
@@ -241,7 +226,7 @@ def _parse_token(tok):
241
226
key = val , reverse = True
242
227
)
243
228
if score :
244
- seq = ["%s:% 0.3f" % ( lemma , w ) for (lemma , w ) in items ]
229
+ seq = [f" { lemma } : { w : 0.3f} " for (lemma , w ) in items ]
245
230
else :
246
231
seq = [lemma for (lemma , w ) in items ]
247
232
@@ -250,7 +235,7 @@ def _parse_token(tok):
250
235
if score :
251
236
def _parse_token (tok ):
252
237
seq = [
253
- "% 0.3f=%s" % ( p . score , p . tag )
238
+ f" { p . score : 0.3f} = { p . tag } "
254
239
for p in morph_parse (tok ) if p .score >= thresh
255
240
]
256
241
return tpl % (tok , join (seq ))
0 commit comments