Skip to content

Commit

Permalink
add Ukrainian paradigm prefixes
Browse files Browse the repository at this point in the history
  • Loading branch information
kmike committed Jan 30, 2015
1 parent e285d9b commit 82f7d66
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 14 deletions.
7 changes: 6 additions & 1 deletion pymorphy2/constants.py
@@ -1,7 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

PARADIGM_PREFIXES = ["", "по", "наи"]

LANG_PARADIGM_PREFIXES = {
"ru": ["", "по", "наи"],
"ua": ["", "най", "якнай", "щонай"],
}


PREDICTION_PREFIXES = [
"авиа",
Expand Down
2 changes: 0 additions & 2 deletions pymorphy2/opencorpora_dict/compile.py
Expand Up @@ -18,7 +18,6 @@
izip = zip

from pymorphy2 import dawg
from pymorphy2.constants import PARADIGM_PREFIXES
from pymorphy2.utils import longest_common_substring, largest_elements

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -62,7 +61,6 @@ def compile_parsed_dict(parsed_dict, compile_options=None):
min_ending_freq=2,
min_paradigm_popularity=3,
max_suffix_length=5,
paradigm_prefixes=PARADIGM_PREFIXES,
)
options.update(compile_options or {})
paradigm_prefixes = options["paradigm_prefixes"]
Expand Down
24 changes: 13 additions & 11 deletions tests/test_opencorpora_dict.py
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

import os

import pytest

import pymorphy2
from pymorphy2.opencorpora_dict.compile import (
_to_paradigm,
Expand All @@ -10,10 +12,9 @@
from pymorphy2.opencorpora_dict.parse import parse_opencorpora_xml
from pymorphy2.dawg import assert_can_create
from pymorphy2.test_suite_generator import make_test_suite
from pymorphy2.constants import PARADIGM_PREFIXES

import pytest
from pymorphy2.constants import LANG_PARADIGM_PREFIXES

RU_PARADIGM_PREFIXES = LANG_PARADIGM_PREFIXES["ru"]

class TestToyDictionary:

Expand Down Expand Up @@ -54,6 +55,7 @@ def test_convert_to_pymorphy2(self, tmpdir):
options = {
'min_paradigm_popularity': 0,
'min_ending_freq': 0,
'paradigm_prefixes': RU_PARADIGM_PREFIXES,
}
convert_to_pymorphy2(self.XML_PATH, out_path, overwrite=True,
compile_options=options, source_name='toy')
Expand All @@ -76,7 +78,7 @@ def test_simple(self):
["ярче", "COMP,Qual"],
["ярчей", "COMP,Qual V-ej"],
]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert stem == "ярче"
assert forms == (
("", "COMP,Qual", ""),
Expand All @@ -88,7 +90,7 @@ def test_single_prefix(self):
["ярче", "COMP,Qual"],
["поярче", "COMP,Qual Cmp2"],
]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert stem == "ярче"
assert forms == (
("", "COMP,Qual", ""),
Expand All @@ -103,7 +105,7 @@ def test_multiple_prefixes(self):
["поярчей", "COMP,Qual Cmp2,V-ej"],
["наиярчайший", "ADJF,Supr,Qual masc,sing,nomn"],
]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert stem == 'ярч'

def test_multiple_prefixes_2(self):
Expand All @@ -112,7 +114,7 @@ def test_multiple_prefixes_2(self):
["наиподробнейший", 2],
["поподробнее", 3]
]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert stem == 'подробне'
assert forms == (
("йший", 1, ""),
Expand All @@ -127,7 +129,7 @@ def test_platina(self):
["поплатиновее", 3],
["поплатиновей", 4],
]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert forms == (
("е", 1, ""),
("й", 2, ""),
Expand All @@ -138,7 +140,7 @@ def test_platina(self):

def test_no_prefix(self):
lexeme = [["английский", 1], ["английского", 2]]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert stem == 'английск'
assert forms == (
("ий", 1, ""),
Expand All @@ -147,7 +149,7 @@ def test_no_prefix(self):

def test_single(self):
lexeme = [["английски", 1]]
stem, forms = _to_paradigm(lexeme, PARADIGM_PREFIXES)
stem, forms = _to_paradigm(lexeme, RU_PARADIGM_PREFIXES)
assert stem == 'английски'
assert forms == (("", 1, ""),)

Expand Down

0 comments on commit 82f7d66

Please sign in to comment.