diff --git a/pymorphy2/constants.py b/pymorphy2/constants.py index 50c66d6..950e05c 100644 --- a/pymorphy2/constants.py +++ b/pymorphy2/constants.py @@ -1,7 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals -PARADIGM_PREFIXES = ["", "по", "наи"] + +LANG_PARADIGM_PREFIXES = { + "ru": ["", "по", "наи"], + "ua": ["", "най", "якнай", "щонай"], +} + PREDICTION_PREFIXES = [ "авиа", diff --git a/pymorphy2/opencorpora_dict/compile.py b/pymorphy2/opencorpora_dict/compile.py index b1b72e3..820a12a 100644 --- a/pymorphy2/opencorpora_dict/compile.py +++ b/pymorphy2/opencorpora_dict/compile.py @@ -18,7 +18,6 @@ izip = zip from pymorphy2 import dawg -from pymorphy2.constants import PARADIGM_PREFIXES from pymorphy2.utils import longest_common_substring, largest_elements logger = logging.getLogger(__name__) @@ -62,7 +61,6 @@ def compile_parsed_dict(parsed_dict, compile_options=None): min_ending_freq=2, min_paradigm_popularity=3, max_suffix_length=5, - paradigm_prefixes=PARADIGM_PREFIXES, ) options.update(compile_options or {}) paradigm_prefixes = options["paradigm_prefixes"] diff --git a/tests/test_opencorpora_dict.py b/tests/test_opencorpora_dict.py index 7775d97..fbbc692 100644 --- a/tests/test_opencorpora_dict.py +++ b/tests/test_opencorpora_dict.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals - import os + +import pytest + import pymorphy2 from pymorphy2.opencorpora_dict.compile import ( _to_paradigm, @@ -10,10 +12,9 @@ from pymorphy2.opencorpora_dict.parse import parse_opencorpora_xml from pymorphy2.dawg import assert_can_create from pymorphy2.test_suite_generator import make_test_suite -from pymorphy2.constants import PARADIGM_PREFIXES - -import pytest +from pymorphy2.constants import LANG_PARADIGM_PREFIXES +RU_PARADIGM_PREFIXES = LANG_PARADIGM_PREFIXES["ru"] class TestToyDictionary: @@ -54,6 +55,7 @@ def test_convert_to_pymorphy2(self, tmpdir): options = { 'min_paradigm_popularity': 0, 'min_ending_freq': 0, + 'paradigm_prefixes': RU_PARADIGM_PREFIXES, } convert_to_pymorphy2(self.XML_PATH, out_path, overwrite=True, compile_options=options, source_name='toy') @@ -76,7 +78,7 @@ def test_simple(self): ["ярче", "COMP,Qual"], ["ярчей", "COMP,Qual V-ej"], ] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert stem == "ярче" assert forms == ( ("", "COMP,Qual", ""), @@ -88,7 +90,7 @@ def test_single_prefix(self): ["ярче", "COMP,Qual"], ["поярче", "COMP,Qual Cmp2"], ] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert stem == "ярче" assert forms == ( ("", "COMP,Qual", ""), @@ -103,7 +105,7 @@ def test_multiple_prefixes(self): ["поярчей", "COMP,Qual Cmp2,V-ej"], ["наиярчайший", "ADJF,Supr,Qual masc,sing,nomn"], ] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert stem == 'ярч' def test_multiple_prefixes_2(self): @@ -112,7 +114,7 @@ def test_multiple_prefixes_2(self): ["наиподробнейший", 2], ["поподробнее", 3] ] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert stem == 'подробне' assert forms == ( ("йший", 1, ""), @@ -127,7 +129,7 @@ def test_platina(self): ["поплатиновее", 3], ["поплатиновей", 4], ] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert forms == ( ("е", 1, ""), ("й", 2, ""), @@ -138,7 +140,7 @@ def test_platina(self): def test_no_prefix(self): lexeme = [["английский", 1], ["английского", 2]] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert stem == 'английск' assert forms == ( ("ий", 1, ""), @@ -147,7 +149,7 @@ def test_no_prefix(self): def test_single(self): lexeme = [["английски", 1]] - stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES) + stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES) assert stem == 'английски' assert forms == (("", 1, ""),)