Skip to content

Commit

Permalink
[tegaki-python] Small kana support.
Browse files Browse the repository at this point in the history
You can now write small kana by writing them in the bottom-right corner.
  • Loading branch information
mblondel committed May 29, 2010
1 parent 60ec374 commit ddfd689
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 7 deletions.
8 changes: 4 additions & 4 deletions tegaki-python/tegaki/engines/tegakizinnia.py
Expand Up @@ -21,7 +21,7 @@

import os

from tegaki.recognizer import Recognizer, RecognizerError
from tegaki.recognizer import Results, Recognizer, RecognizerError
from tegaki.trainer import Trainer, TrainerError

try:
Expand All @@ -39,7 +39,7 @@ def open(self, path):
ret = self._recognizer.open(path)
if not ret: raise RecognizerError, "Could not open!"

def recognize(self, writing, n=10):
def _recognize(self, writing, n=10):
s = zinnia.Character()

s.set_width(writing.get_width())
Expand All @@ -55,8 +55,8 @@ def recognize(self, writing, n=10):
result = self._recognizer.classify(s, n+1)
size = result.size()

return [(result.value(i), result.score(i)) \
for i in range(0, (size - 1))]
return Results([(result.value(i), result.score(i)) \
for i in range(0, (size - 1))])

RECOGNIZER_CLASS = ZinniaRecognizer

Expand Down
44 changes: 41 additions & 3 deletions tegaki-python/tegaki/recognizer.py
Expand Up @@ -26,6 +26,34 @@
from tegaki.engine import Engine
from tegaki.dictutils import SortedDict

SMALL_HIRAGANA = {
"あ":"ぁ","い":"ぃ","う":"ぅ","え":"ぇ","お":"ぉ","つ":"っ",
"や":"ゃ","ゆ":"ゅ","よ":"ょ","わ":"ゎ"
}

SMALL_KATAKANA = {
"ア":"ァ","イ":"ィ","ウ":"ゥ","エ":"ェ","オ":"ォ","ツ":"ッ",
"ヤ":"ャ","ユ":"ュ","ヨ":"ョ","ワ":"ヮ"
}

class Results(list):
"""
Object containing recognition results.
"""

def get_candidates(self):
return [c[0] for c in self]

def get_scores(self):
return [c[1] for c in self]

def to_small_kana(self):
cand = [SMALL_HIRAGANA[c] if c in SMALL_HIRAGANA else c \
for c in self.get_candidates()]
cand = [SMALL_KATAKANA[c] if c in SMALL_KATAKANA else c \
for c in cand]
return Results(zip(cand, self.get_scores()))

class RecognizerError(Exception):
"""
Raised when something went wrong in a Recognizer.
Expand All @@ -51,6 +79,7 @@ class Recognizer(Engine):

def __init__(self):
self._model = None
self._lang = None

@classmethod
def get_available_recognizers(cls):
Expand Down Expand Up @@ -197,9 +226,9 @@ def set_model(self, model_name):

self.set_options(meta)

path = meta["path"]
if "language" in meta: self._lang = meta["language"]

self.open(path)
self.open(meta["path"])

# To be implemented by child class
def recognize(self, writing, n=10):
Expand All @@ -217,7 +246,16 @@ def recognize(self, writing, n=10):
A model must be loaded with open or set_model() beforehand.
"""
raise NotImplementedError
is_small = False
if self._lang == "ja":
is_small = writing.is_small()

results = self._recognize(writing, n)

if is_small:
return results.to_small_kana()
else:
return results


if __name__ == "__main__":
Expand Down
33 changes: 33 additions & 0 deletions tegaki-python/tests/test_recognizer.py
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-

# Copyright (C) 2010 The Tegaki project contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# Contributors to this file:
# - Mathieu Blondel

import unittest

from tegaki.recognizer import *

class ResultsTest(unittest.TestCase):

def testToSmallKana(self):
res = Results([("マ",1),("チ",2),("ユ",3),("ー",4)]).to_small_kana()
res2 = Results([("ま",1),("ち",2),("ゆ",3),("ー",4)]).to_small_kana()
self.assertEquals(res[2][0], "ュ")
self.assertEquals(res2[2][0], "ゅ")

0 comments on commit ddfd689

Please sign in to comment.