Permalink
Browse files

some tests for dictionary creation

  • Loading branch information...
1 parent 372647c commit 9588c01d2517f560e5436bf89723429f4b4149e6 @kmike committed Mar 21, 2013
View
Oops, something went wrong.
@@ -254,11 +254,11 @@ def _suffixes_prediction_data(words, paradigm_popularity, gramtab, paradigms, su
# [form_prefix_id]["suffix"]["POS"][(para_id, idx)] => number or occurrences
# this is for selecting most popular parses
- endings = collections.defaultdict(
- lambda: collections.defaultdict(
- lambda: collections.defaultdict(
- lambda: collections.defaultdict(int)))
- )
+ endings = {}
+ for form_prefix_id in range(len(PARADIGM_PREFIXES)):
+ endings[form_prefix_id] = collections.defaultdict(
+ lambda: collections.defaultdict(
+ lambda: collections.defaultdict(int)))
logger.debug('calculating prediction data: checking word endings..')
for word, (para_id, idx) in words:
@@ -57,6 +57,9 @@ def load_dict(path, gramtab_format='opencorpora-int'):
prediction_suffixes_dawgs = []
for prefix_id in range(len(paradigm_prefixes)):
fn = _f('prediction-suffixes-%s.dawg' % prefix_id)
+
+ assert os.path.exists(fn)
+
prediction_suffixes_dawgs.append(dawg.PredictionSuffixesDAWG().load(fn))
return LoadedDictionary(meta, gramtab, suffixes, paradigms, words,
@@ -1,7 +1,64 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
-from pymorphy2.opencorpora_dict.compile import _to_paradigm
+import os
+import pymorphy2
+from pymorphy2.opencorpora_dict.compile import (_to_paradigm,
+ convert_to_pymorphy2)
+from pymorphy2.opencorpora_dict.parse import parse_opencorpora_xml
+from pymorphy2.dawg import assert_can_create
+
+import pytest
+
+
+class TestToyDictionary:
+
+ XML_PATH = os.path.join(
+ os.path.dirname(__file__),
+ '..',
+ 'dev_data',
+ 'toy_dict.xml'
+ )
+
+ def test_parse_xml(self):
+ dct = parse_opencorpora_xml(self.XML_PATH)
+ assert dct.version == '0.92'
+ assert dct.revision == '389440'
+
+ assert dct.links[0] == ('5', '6', '1')
+ assert len(dct.links) == 12
+
+ assert dct.grammemes[1] == ('NOUN', 'POST', 'СУЩ', 'имя существительное')
+ assert len(dct.grammemes) == 111
+
+ assert dct.lexemes['14'] == [('ёжиться', 'INFN,impf,intr')]
+
+
+ def test_convert_to_pymorphy2(self, tmpdir):
+
+ # import logging
+ # from pymorphy2.opencorpora_dict.compile import logger
+ # logger.setLevel(logging.DEBUG)
+ # logger.addHandler(logging.StreamHandler())
+
+ try:
+ assert_can_create()
+ except NotImplementedError as e:
+ raise pytest.skip(e)
+
+ # create a dictionary
+ out_path = str(tmpdir.join('dicts'))
+ options = {
+ 'min_paradigm_popularity': 0,
+ 'min_ending_freq': 0,
+ }
+ convert_to_pymorphy2(self.XML_PATH, out_path, overwrite=True,
+ prediction_options=options)
+
+ # use it
+ morph = pymorphy2.MorphAnalyzer(out_path)
+ assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]
+
class TestToParadigm(object):

0 comments on commit 9588c01

Please sign in to comment.