Skip to content
This repository has been archived by the owner on Jul 22, 2022. It is now read-only.

Commit

Permalink
Merge 9d60486 into fb71b1a
Browse files Browse the repository at this point in the history
  • Loading branch information
miurahr committed May 17, 2021
2 parents fb71b1a + 9d60486 commit ac8653e
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -U pip
pip install -U pip wheel setuptools
pip install tox tox-gh-actions
- name: Run benchmark with tox
run: |
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,12 @@ package_dir =
packages = find:
provides = pykakasi
setup_requires =
pygtrie
wheel
setuptools>=42
setuptools-scm[toml]>=3.5.0
install_requires =
pygtrie
jaconv
deprecated
importlib_metadata;python_version<"3.8"
Expand Down
22 changes: 10 additions & 12 deletions src/kakasidict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import os
import pickle
import re
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Union

import pygtrie # type: ignore # noqa

root_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

Expand Down Expand Up @@ -73,7 +75,7 @@ def maketrans(self, src, dst):
# for kanwadict

def _makekanwa(self, sources: List[str], dst: str):
self.records: Dict[int, Dict[str, List[Tuple[str, ...]]]] = {}
self.records = pygtrie.CharTrie()
for src in sources:
with open(src, "r", encoding="utf-8") as f:
for line in f:
Expand Down Expand Up @@ -119,20 +121,16 @@ def _parse_kakasi_dict(self, line: str) -> None:
}

def _updaterec(self, kanji: str, yomi, tail) -> None:
key = ord(kanji[0])
if tail == "":
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
if kanji in self.records:
rec = self.records.get(kanji)
if yomi not in rec:
rec.append(yomi)
self.records[key].update({kanji: rec})
else:
self.records[key][kanji] = [yomi]
self.records[kanji] = rec
else:
self.records[key] = {}
self.records[key][kanji] = [yomi]
self.records[kanji] = [yomi]
else:
for c in self._cletters.get(tail, ""):
for c in self._cletters.get(tail, ()):
self._updaterec(kanji + c, yomi + c, "")

def kanwaout(self, out):
Expand Down
35 changes: 14 additions & 21 deletions src/pykakasi/kanji.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import threading
from typing import Tuple

import pygtrie # type: ignore # noqa

from .properties import Configurations


Expand All @@ -24,20 +26,18 @@ def convert(self, itext: str) -> Tuple[str, int]:
Hstr = ""
text = self._itaiji.convert(itext)
num_vs = len(itext) - len(text)
table = self._kanwa.load(text[0])
if table is None:
res = self._kanwa.search(text)
if not bool(res):
return "", 0
for (k, v) in table.items():
length = len(k)
if len(text) >= length:
if text.startswith(k):
for yomi in v:
if max_len < length:
Hstr = yomi
max_len = length
length = len(res.key)
max_len = length
for yomi in res.value:
# FIXME: how to select from multiple candidate
Hstr = yomi
break
for _ in range(
num_vs
): # when converting string with kanji wit variation selector, calculate max_len again
): # when converting string with variation selector, calculate max_len again
if max_len > len(itext):
break
elif text[max_len - 1] != itext[max_len - 1]:
Expand All @@ -52,13 +52,6 @@ def convert(self, itext: str) -> Tuple[str, int]:
pass
return (Hstr, max_len)

def _isCletter(self, literal: str, c: str) -> bool:
if (0x3041 <= ord(c) <= 0x309F) and (
literal in self._cl_table[ord(c) - 0x3040]
): # ぁ:= u\3041
return True
return False

def _is_vschr(self, ch):
return 0x0E0100 <= ord(ch) <= 0x0E1EF or 0xFE00 <= ord(ch) <= 0xFE02

Expand Down Expand Up @@ -97,6 +90,7 @@ class Kanwa:
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
self._jisyo_table: pygtrie.CharTrie
return self

def __init__(self):
Expand All @@ -107,6 +101,5 @@ def __init__(self):
with open(dictpath, "rb") as d:
self._jisyo_table = pickle.load(d)

def load(self, char: str):
key = ord(char[0])
return self._jisyo_table.get(key, None)
def search(self, key):
return self._jisyo_table.longest_prefix(key)

0 comments on commit ac8653e

Please sign in to comment.