pymorphy2-fork · insolor · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
@@ -1,2 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import

diff --git a/benchmarks/bench.py b/benchmarks/bench.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 """
 Pymorphy2 benchmark utility.
 
@@ -15,8 +13,9 @@
 
 """
 import logging
-import sys
 import os
+import sys
+
 from docopt import docopt
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

diff --git a/benchmarks/shrink-unigrams.ipynb b/benchmarks/shrink-unigrams.ipynb
@@ -18,7 +18,6 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from __future__ import unicode_literals\n",
       "import random\n",
       "import math\n",
       "random.seed(0)\n",

diff --git a/benchmarks/speed.py b/benchmarks/speed.py
@@ -1,13 +1,11 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals, division
-import logging
 import codecs
-import os
-import functools
 import datetime
+import functools
+import logging
+import os
 
-from pymorphy2 import MorphAnalyzer
 from benchmarks import utils
+from pymorphy2 import MorphAnalyzer
 
 logger = logging.getLogger('pymorphy2.bench')
 

diff --git a/benchmarks/utils.py b/benchmarks/utils.py
@@ -1,8 +1,7 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals, division
+import gc
 import time
 import timeit
-import gc
+
 
 def measure(func, inner_iterations=1, repeats=5):
     """

diff --git a/docs/conf.py b/docs/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # pymorphy2 documentation build configuration file, created by
 # sphinx-quickstart on Sun Jul 29 04:34:30 2012.
 #
@@ -10,9 +8,9 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
-from __future__ import unicode_literals
 
-import sys, os
+import os
+import sys
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the

diff --git a/pymorphy2/__init__.py b/pymorphy2/__init__.py
@@ -1,3 +1,2 @@
-# -*- coding: utf-8 -*-
-from .version import __version__
 from .analyzer import MorphAnalyzer
+from .version import __version__
diff --git a/pymorphy2/analyzer.py b/pymorphy2/analyzer.py
@@ -1,16 +1,14 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals, division
-import os
-import heapq
 import collections
+import heapq
 import logging
-import threading
 import operator
+import os
+import threading
 import warnings
 
+import pymorphy2.lang
 from pymorphy2 import opencorpora_dict
 from pymorphy2.dawg import ConditionalProbDistDAWG
-import pymorphy2.lang
 
 logger = logging.getLogger(__name__)
 
@@ -66,7 +64,7 @@ def normalized(self):
     #     return self._dict.build_paradigm_info(self.para_id)
 
 
-class ProbabilityEstimator(object):
+class ProbabilityEstimator:
     def __init__(self, dict_path):
         cpd_path = os.path.join(dict_path, 'p_t_given_w.intdawg')
         self.p_t_given_w = ConditionalProbDistDAWG().load(cpd_path)
@@ -140,14 +138,12 @@ def lang_dict_path(lang):
         return lang_paths[lang]
 
     raise ValueError(
-        "Can't find a dictionary for language %r. Installed languages: %r. "
-        "Try installing pymorphy2-dicts-%s package." % (
-            lang, list(lang_paths.keys()), lang
-        )
+        f"Can't find a dictionary for language {lang!r}. Installed languages: {list(lang_paths.keys())!r}. "
+        f"Try installing pymorphy2-dicts-{lang} package."
     )
 
 
-class MorphAnalyzer(object):
+class MorphAnalyzer:
     """
     Morphological analyzer for Russian language.
 
@@ -293,8 +289,8 @@ def choose_language(cls, dictionary, lang):
         if dictionary.lang != lang:
             # allow incorrect 'lang' values, but show a warning
             warnings.warn(
-                "Dictionary language (%r) doesn't match "
-                "analyzer language (%r)." % (dictionary.lang, lang)
+                f"Dictionary language ({dictionary.lang!r}) doesn't match "
+                f"analyzer language ({lang!r})."
             )
 
         return lang

diff --git a/pymorphy2/cache.py b/pymorphy2/cache.py
diff --git a/pymorphy2/cli.py b/pymorphy2/cli.py
@@ -1,19 +1,12 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from __future__ import unicode_literals, print_function, division
-
-import sys
 import logging
-import time
-import codecs
 import operator
+import sys
+import time
+from functools import lru_cache
 
 import pymorphy2
-from pymorphy2.cache import lru_cache, memoized_with_single_argument
-from pymorphy2.utils import get_mem_usage
 from pymorphy2.tokenizers import simple_word_tokenize
-
-PY2 = sys.version_info[0] == 2
+from pymorphy2.utils import get_mem_usage
 
 # Hacks are here to make docstring compatible with both
 # docopt and sphinx.ext.autodoc.
@@ -80,10 +73,7 @@ def main(argv=None):
         else:
             score, lemmatize, tag = True, True, True
 
-        if PY2:
-            out_file = codecs.getwriter('utf8')(sys.stdout)
-        else:
-            out_file = sys.stdout
+        out_file = sys.stdout
 
         return parse(
             morph=morph,
@@ -112,14 +102,9 @@ def main(argv=None):
 def _open_for_read(fn):
     """ Open a file for reading """
     if fn in ['-', '', None]:
-        if PY2:
-            return codecs.getreader('utf8')(sys.stdin)
-        else:
-            return sys.stdin
-    if PY2:
-        return codecs.open(fn, 'rt', encoding='utf8')
-    else:
-        return open(fn, 'rt', encoding='utf8')
+        return sys.stdin
+
+    return open(fn, 'rt', encoding='utf8')
 
 
 # ============================ Commands ===========================
@@ -178,7 +163,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
 
     _parse = parser.parse
     if cache_size == 'unlim':
-        _parse = memoized_with_single_argument({})(_parse)
+        _parse = lru_cache(None)(_parse)
     else:
         cache_size = int(cache_size)
         if cache_size:
@@ -189,7 +174,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
         _write(_parse(token))
 
 
-class _TokenParserFormatter(object):
+class _TokenParserFormatter:
     """
     This class defines its `parse` method based on arguments passed.
     Some ugly code is to make all ifs work only once, not for each token.
@@ -218,14 +203,14 @@ def __init__(self, morph, score, normal_form, tag, newlines, thresh):
                 if score:
                     def _parse_token(tok):
                         seq = [
-                            "%s:%0.3f=%s" % (p.normal_form, p.score, p.tag)
+                            f"{p.normal_form}:{p.score:0.3f}={p.tag}"
                             for p in morph_parse(tok) if p.score >= thresh
                         ]
                         return tpl % (tok, join(seq))
                 else:
                     def _parse_token(tok):
                         seq = [
-                            "%s:%s" % (p.normal_form, p.tag)
+                            f"{p.normal_form}:{p.tag}"
                             for p in morph_parse(tok) if p.score >= thresh
                         ]
                         return tpl % (tok, join(seq))
@@ -241,7 +226,7 @@ def _parse_token(tok):
                         key=val, reverse=True
                     )
                     if score:
-                        seq = ["%s:%0.3f" % (lemma, w) for (lemma, w) in items]
+                        seq = [f"{lemma}:{w:0.3f}" for (lemma, w) in items]
                     else:
                         seq = [lemma for (lemma, w) in items]
 
@@ -250,7 +235,7 @@ def _parse_token(tok):
             if score:
                 def _parse_token(tok):
                     seq = [
-                        "%0.3f=%s" % (p.score, p.tag)
+                        f"{p.score:0.3f}={p.tag}"
                         for p in morph_parse(tok) if p.score >= thresh
                     ]
                     return tpl % (tok, join(seq))

diff --git a/pymorphy2/dawg.py b/pymorphy2/dawg.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, division
-
 try:
     from dawg import DAWG, RecordDAWG, IntCompletionDAWG
     EXTENSION_AVAILABLE = True
@@ -25,14 +22,14 @@ class WordsDawg(RecordDAWG):
     # We are storing 2 unsigned short ints as values:
     # the paradigm ID and the form index (inside paradigm).
     # Byte order is big-endian (this makes word forms properly sorted).
-    DATA_FORMAT = str(">HH")
+    DATA_FORMAT = ">HH"
 
     def __init__(self, data=None):
         if data is None:
-            super(WordsDawg, self).__init__(self.DATA_FORMAT)
+            super().__init__(self.DATA_FORMAT)
         else:
             assert_can_create()
-            super(WordsDawg, self).__init__(self.DATA_FORMAT, data)
+            super().__init__(self.DATA_FORMAT, data)
 
 
 class PredictionSuffixesDAWG(WordsDawg):
@@ -43,7 +40,7 @@ class PredictionSuffixesDAWG(WordsDawg):
     # We are storing 3 unsigned short ints as values:
     # count, the paradigm ID and the form index (inside paradigm).
     # Byte order is big-endian (this makes word forms properly sorted).
-    DATA_FORMAT = str(">HHH")
+    DATA_FORMAT = ">HHH"
 
 
 class ConditionalProbDistDAWG(IntCompletionDAWG):
@@ -52,17 +49,17 @@ class ConditionalProbDistDAWG(IntCompletionDAWG):
 
     def __init__(self, data=None):
         if data is None:
-            super(ConditionalProbDistDAWG, self).__init__()
+            super().__init__()
         else:
             assert_can_create()
             dawg_data = (
-                ("%s:%s" % (word, tag), int(prob*self.MULTIPLIER))
+                (f"{word}:{tag}", int(prob * self.MULTIPLIER))
                 for (word, tag), prob in data
             )
-            super(ConditionalProbDistDAWG, self).__init__(dawg_data)
+            super().__init__(dawg_data)
 
     def prob(self, word, tag):
-        dawg_key = "%s:%s" % (word, tag)
+        dawg_key = f"{word}:{tag}"
         return self.get(dawg_key, 0) / self.MULTIPLIER
 
 
@@ -71,7 +68,7 @@ def is_prefixed(self, word):
         return bool(self.prefixes(word))
 
 
-class PythonPrefixMatcher(object):
+class PythonPrefixMatcher:
     def __init__(self, prefixes):
         self._prefixes = tuple(prefixes)
 

diff --git a/pymorphy2/lang/__init__.py b/pymorphy2/lang/__init__.py
@@ -1,3 +1 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from . import ru, uk
diff --git a/pymorphy2/lang/ru/__init__.py b/pymorphy2/lang/ru/__init__.py
@@ -1,3 +1 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from .config import *
diff --git a/pymorphy2/lang/ru/config.py b/pymorphy2/lang/ru/config.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """
 Constants and configuration for Russian language.
 """
-from __future__ import absolute_import, unicode_literals
 from pymorphy2 import units
 
 # paradigm prefixes used for dictionary compilation

diff --git a/pymorphy2/lang/uk/__init__.py b/pymorphy2/lang/uk/__init__.py
@@ -1,3 +1 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from .config import *
diff --git a/pymorphy2/lang/uk/_prefixes.py b/pymorphy2/lang/uk/_prefixes.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-
 # Prefixes which don't change the word parse.
 # The list is from
 # https://github.com/languagetool-org/languagetool/blob/master/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/dash_prefixes.txt

diff --git a/pymorphy2/lang/uk/config.py b/pymorphy2/lang/uk/config.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """
 Constants and configuration for Ukrainian language.
 """
-from __future__ import absolute_import, unicode_literals
 from pymorphy2 import units
 from ._prefixes import KNOWN_PREFIXES
Original file line number	Diff line number	Diff line change
		@@ -1,2 +0,0 @@
		# -- coding: utf-8 --
		from __future__ import absolute_import