Skip to content
This repository has been archived by the owner on Nov 30, 2023. It is now read-only.

Commit

Permalink
Work in progress (Part Deux)
Browse files Browse the repository at this point in the history
  • Loading branch information
jdlorimer committed Jan 10, 2019
1 parent d7fb084 commit 6640223
Show file tree
Hide file tree
Showing 22 changed files with 424 additions and 495 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,5 +1,6 @@
*~
.coverage
.hypothesis/
.mypy_cache/
.prospector.yaml
.python-version
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -7,7 +7,7 @@ before_install:
- pip install --upgrade pytest

install:
- pip install pytest-cov python-coveralls
- pip install hypothesis pytest-cov python-coveralls

before_script:
- export PYTHONPATH=.
Expand Down
39 changes: 17 additions & 22 deletions chinese/behavior.py
Expand Up @@ -18,13 +18,13 @@

from .bopomofo import bopomofo
from .color import colorize, colorize_dict, colorize_fuse
from .hanzi import separate_chars, silhouette, simplify, traditional
from .hanzi import get_silhouette, get_simp, get_trad, split_hanzi
from .main import config, dictionary
from .ruby import hide_ruby, ruby
from .sound import no_sound, sound
from .transcribe import accentuate, no_tone, separate_trans, transcribe
from .transcribe import accentuate, no_tone, split_transcript, transcribe
from .translate import translate
from .util import cleanup, get_first, has_field, hide, set_all
from .util import cleanup, erase_fields, get_first, has_field, hide, set_all


def get_classifier(hanzi, note):
Expand Down Expand Up @@ -95,14 +95,14 @@ def fill_all_defs(hanzi, note):


def fill_silhouette(hanzi, note):
m = silhouette(hanzi)
m = get_silhouette(hanzi)
set_all(config['fields']['silhouette'], note, to=m)


def format_transcription(note):
def format_transcript(note):
t = colorize(
accentuate(
separate_trans(
split_transcript(
cleanup(get_first(config['fields']['transcription'], note))
)
)
Expand All @@ -111,12 +111,12 @@ def format_transcription(note):
set_all(config['fields']['transcription'], note, to=t)


def fill_transcription(hanzi, note):
def fill_transcript(hanzi, note):
n_filled = 0
separated = separate_chars(hanzi)
separated = split_hanzi(hanzi)

for key, target, func, only_one in [
('transcription', None, format_transcription, True),
('transcription', None, format_transcript, True),
('pinyin', 'Pinyin', format_pinyin, True),
('pinyinTaiwan', 'Pinyin (Taiwan)', format_taiwan_pinyin, True),
('cantonese', 'Cantonese', format_cantonese, False),
Expand All @@ -136,7 +136,7 @@ def fill_transcription(hanzi, note):
def format_pinyin(note):
t = colorize(
accentuate(
separate_trans(
split_transcript(
cleanup(get_first(config['fields']['pinyin'], note)), True
)
)
Expand All @@ -148,7 +148,7 @@ def format_pinyin(note):
def format_taiwan_pinyin(note):
t = colorize(
accentuate(
separate_trans(
split_transcript(
cleanup(get_first(config['fields']['pinyinTaiwan'], note)),
True,
)
Expand All @@ -160,7 +160,7 @@ def format_taiwan_pinyin(note):

def format_cantonese(note):
t = colorize(
separate_trans(cleanup(get_first(config['fields']['cantonese'], note)))
split_transcript(cleanup(get_first(config['fields']['cantonese'], note)))
)
t = hide(t, no_tone(t))
set_all(config['fields']['cantonese'], note, to=t)
Expand All @@ -173,7 +173,7 @@ def fill_bopomofo(hanzi, note):
syllables = cleanup(field).split()
n_filled = 0
else:
syllables = transcribe(separate_chars(hanzi), 'Bopomofo')
syllables = transcribe(split_hanzi(hanzi), 'Bopomofo')
n_filled = 1

text = colorize(syllables)
Expand Down Expand Up @@ -216,7 +216,7 @@ def fill_simp(hanzi, note):
if not get_first(config['fields']['simplified'], note) == '':
return

s = simplify(hanzi)
s = get_simp(hanzi)
if s is not None and s != hanzi:
set_all(config['fields']['simplified'], note, to=s)
else:
Expand All @@ -227,7 +227,7 @@ def fill_trad(hanzi, note):
if not get_first(config['fields']['traditional'], note) == '':
return

t = traditional(hanzi)
t = get_trad(hanzi)
if t is not None and t != hanzi:
set_all(config['fields']['traditional'], note, to=t)
else:
Expand Down Expand Up @@ -278,11 +278,6 @@ def fill_all_rubies(hanzi, note):
set_all(config['fields'][ruby_field], note, to=rubified)


def erase_fields(note):
for f in config['fields'].values():
set_all(f, note, to='')


def update_fields(note, focus_field, fields):
if 'addon' in note.model():
model = note.model()['addon']
Expand All @@ -308,7 +303,7 @@ def update_fields(note, focus_field, fields):
elif focus_field in config['fields']['hanzi']:
if copy[focus_field]:
fill_all_defs(hanzi, copy)
fill_transcription(hanzi, copy)
fill_transcript(hanzi, copy)
fill_color(hanzi, copy)
fill_sound(hanzi, copy)
fill_simp(hanzi, copy)
Expand All @@ -318,7 +313,7 @@ def update_fields(note, focus_field, fields):
else:
erase_fields(copy)
elif focus_field in config['fields']['transcription']:
format_transcription(copy)
format_transcript(copy)
fill_color(hanzi, copy)
fill_all_rubies(hanzi, copy)
elif focus_field in config['fields']['pinyin']:
Expand Down
72 changes: 30 additions & 42 deletions chinese/color.py
Expand Up @@ -18,10 +18,17 @@

from re import IGNORECASE, sub

from .consts import pinyin_regex, half_ruby_regex, ruby_regex
from .hanzi import separate_chars
from .consts import (
COLOR_RUBY_TEMPLATE,
COLOR_TEMPLATE,
pinyin_regex,
half_ruby_regex,
HANZI_RANGE,
ruby_regex,
)
from .hanzi import split_hanzi
from .sound import extract_sound_tags
from .transcribe import accentuate, separate_trans, tone_number
from .transcribe import tone_number, sanitize_transcript
from .util import align, cleanup, is_punc, no_color


Expand Down Expand Up @@ -64,41 +71,7 @@ def repl(p):
return ' '.join(colorized)


def colorize_fuse(chars, trans, ruby=False):
"""Colorize hanzi based on pinyin tone.
If ruby=True, then annotate hanzi with pinyin.
"""

standard_fmt = '<span class="tone{tone}">{chars}</span>'
ruby_fmt = (
'<span class="tone{tone}"><ruby>{chars}<rt>{trans}</rt></ruby></span>'
)

chars = separate_chars(cleanup(chars), grouped=False)
trans = sanitize_pinyin(trans)
text = ''

for c, t in align(chars, trans):
if c is None or t is None:
continue
if is_punc(c) and is_punc(t):
text += c
continue
if ruby:
text += ruby_fmt.format(tone=tone_number(t), chars=c, trans=t)
else:
text += standard_fmt.format(tone=tone_number(t), chars=c)

return text


def colorize_dict(text):
"""Colorize text in the form: 你好[ni3 hao].
As used in the local dictionaries.
"""

def _sub(p):
s = ''
hanzi = p.group(1)
Expand All @@ -115,10 +88,25 @@ def _sub(p):

return s

return sub(r'([\u3400-\u9fff|]+)\[(.*?)\]', _sub, text)
return sub(r'([\%s|]+)\[(.*?)\]' % HANZI_RANGE, _sub, text)


def sanitize_pinyin(pinyin, grouped=False):
return ' '.join(
accentuate(separate_trans(cleanup(no_color(pinyin)), grouped))
).split()
def colorize_fuse(chars, transcript, ruby=False):
chars = split_hanzi(cleanup(chars), grouped=False)
transcript = sanitize_transcript(transcript)
colorized = ''

for c, t in align(chars, transcript):
if c is None or t is None:
continue
if is_punc(c) and is_punc(t):
colorized += c
continue
if ruby:
colorized += COLOR_RUBY_TEMPLATE.format(
tone=tone_number(t), chars=c, transcript=t
)
else:
colorized += COLOR_TEMPLATE.format(tone=tone_number(t), chars=c)

return colorized
58 changes: 13 additions & 45 deletions chinese/config.json
Expand Up @@ -8,9 +8,9 @@
"transcription": "Pinyin",
"fields": {
"hanzi": [
"Hanzi",
"Chinese",
"Expression",
"Hanzi",
"中文",
"汉字",
"漢字"
Expand All @@ -31,8 +31,8 @@
"英语"
],
"german": [
"Deutsch",
"German",
"Deutsch",
"德文",
"德語",
"德语"
Expand All @@ -48,15 +48,13 @@
"Reading"
],
"pinyin": [
"PY",
"Pinyin",
"大陆拼音",
"大陸拼音",
"拼音"
],
"pinyinTaiwan": [
"PYTW",
"PinyinTW",
"Pinyin (Taiwan)",
"台湾拼音",
"台灣拼音",
"臺灣拼音"
Expand All @@ -76,97 +74,67 @@
],
"bopomofo": [
"Bopomofo",
"Zhuyin",
"ㄅㄆㄇㄈ",
"注音符号",
"注音符號",
"註音符號"
],
"sound": [
"Audio",
"Sound",
"Spoken",
"Audio",
"声音",
"聲音"
],
"mandarinSound": [
"Sound (Mandarin)",
"Sound - Mandarin"
],
"cantoneseSound": [
"Sound (Cantonese)",
"Sound - Cantonese"
],
"simplified": [
"Simp",
"Simp.",
"Simplified",
"简体",
"简体字",
"简化",
"简化字",
"簡化",
"簡化字",
"簡體",
"簡體字"
],
"traditional": [
"Trad",
"Trad.",
"Traditional",
"繁体",
"繁体字",
"繁體",
"繁體字"
],
"classifier": [
"Classifier",
"MW",
"Mean Word",
"Mean",
"Measure Word",
"量詞",
"量词"
],
"alternative": [
"Also Written",
"Alt",
"Alternative"
],
"color": [
"Color",
"Colored Hanzi",
"Colour",
"Coloured Hanzi",
"彩色"
],
"colorPinyin": [
"ColorPY",
"ColourPY"
],
"colorPinyinTaiwan": [
"ColorPYTW",
"ColourPYTW"
],
"colorCantonese": [
"ColorCANT",
"ColourCANT"
],
"colorBopomofo": [
"ColorBPMF",
"ColourBPMF"
],
"ruby": [
"Ruby"
],
"rubyPinyin": [
"RubyPY"
"Ruby (Pinyin)"
],
"rubyPinyinTaiwan": [
"RubyPYTW"
],
"rubyCantonese": [
"RubyCANT"
"Ruby (Taiwan Pinyin)"
],
"rubyBopomofo": [
"RubyBPMF"
"Ruby (Bopomofo)"
],
"rubyCantonese": [
"Ruby (Cantonese)"
],
"silhouette": [
"Silhouette"
Expand Down

0 comments on commit 6640223

Please sign in to comment.