From f1851b9271e36986974722ffdaf0e52426fe021a Mon Sep 17 00:00:00 2001 From: luozijun Date: Mon, 3 Jun 2019 23:28:07 +0800 Subject: [PATCH] update. --- crates/syllable/{src => scripts}/gen.py | 0 crates/syllable/scripts/pinyin_table_gen.rs | 249 ++++++++++++++++ crates/syllable/src/initial.rs | 1 + crates/syllable/src/letter.rs | 298 +++++++++++++++++--- crates/syllable/src/lib.rs | 67 +++++ crates/syllable/src/main.rs | 219 ++------------ crates/syllable/src/pinyin.rs | 289 +++++++++++++------ crates/syllable/src/rhyme.rs | 76 ++--- crates/syllable/src/tone.rs | 24 +- 9 files changed, 861 insertions(+), 362 deletions(-) rename crates/syllable/{src => scripts}/gen.py (100%) create mode 100644 crates/syllable/scripts/pinyin_table_gen.rs diff --git a/crates/syllable/src/gen.py b/crates/syllable/scripts/gen.py similarity index 100% rename from crates/syllable/src/gen.py rename to crates/syllable/scripts/gen.py diff --git a/crates/syllable/scripts/pinyin_table_gen.rs b/crates/syllable/scripts/pinyin_table_gen.rs new file mode 100644 index 0000000..a206163 --- /dev/null +++ b/crates/syllable/scripts/pinyin_table_gen.rs @@ -0,0 +1,249 @@ + + +pub(crate) const ________: &'static str = " "; + +// ∅ b p m f d t n l g k h j q x zh ch sh r z c s +pub static PINYIN_TABLE: [&'static str; 924] = [ +// ∅, b, p, m, f, d, t, n, l, g, k, h, j, q, x, zh, ch, sh, r, z, c, s, +// Group a Finals + ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhi", "chi", "shi", "ri", "zi", "ci", "si", + "a", "ba", "pa", "ma", "fa", "da", "ta", "na", "la", "ga", "ka", "ha", ________, ________, ________, "zha", "cha", "sha", ________, "za", "ca", "sa", + "e", ________, ________, "me", ________, "de", "te", "ne", "le", "ge", "ke", "he", ________, ________, ________, "zhe", "che", "she", "re", "ze", "ce", "se", + "ê", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + "er", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + "ai", "bai", "pai", "mai", ________, "dai", "tai", "nai", "lai", "gai", "kai", "hai", ________, ________, ________, "zhai", "chai", "shai", ________, "zai", "cai", "sai", + "ei", "bei", "pei", "mei", "fei", "dei", "tei", "nei", "lei", "gei", "kei", "hei", ________, ________, ________, "zhei", ________, "shei", ________, "zei", ________, "sei", + "ao", "bao", "pao", "mao", ________, "dao", "tao", "nao", "lao", "gao", "kao", "hao", ________, ________, ________, "zhao", "chao", "shao", "rao", "zao", "cao", "sao", + "ou", ________, "pou", "mou", "fou", "dou", "tou", "nou", "lou", "gou", "kou", "hou", ________, ________, ________, "zhou", "chou", "shou", "rou", "zou", "cou", "sou", + "an", "ban", "pan", "man", "fan", "dan", "tan", "nan", "lan", "gan", "kan", "han", ________, ________, ________, "zhan", "chan", "shan", "ran", "zan", "can", "san", + "en", "ben", "pen", "men", "fen", "den", ________, "nen", ________, "gen", "ken", "hen", ________, ________, ________, "zhen", "chen", "shen", "ren", "zen", "cen", "sen", + "ang", "bang", "pang", "mang", "fang", "dang", "tang", "nang", "lang", "gang", "kang", "hang", ________, ________, ________, "zhang", "chang", "shang", "rang", "zang", "cang", "sang", + "eng", "beng", "peng", "meng", "feng", "deng", "teng", "neng", "leng", "geng", "keng", "heng", ________, ________, ________, "zheng", "cheng", "sheng", "reng", "zeng", "ceng", "seng", + // 注: "ong" 不存在 + +// Group i Finals + "yi", "bi", "pi", "mi", ________, "di", "ti", "ni", "li", ________, ________, ________, "ji", "qi", "xi", ________, ________, ________, ________, ________, ________, ________, + "ya", ________, ________, ________, ________, "dia", ________, "nia", "lia", ________, ________, ________, "jia", "qia", "xia", ________, ________, ________, ________, ________, ________, ________, + "yo", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + "ye", "bie", "pie", "mie", ________, "die", "tie", "nie", "lie", ________, ________, ________, "jie", "qie", "xie", ________, ________, ________, ________, ________, ________, ________, + // "yai", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + "yao", "biao", "piao", "miao", "fiao", "diao", "tiao", "niao", "liao", ________, ________, ________, "jiao", "qiao", "xiao", ________, ________, ________, ________, ________, ________, ________, + "you", ________, ________, "miu", ________, "diu", ________, "niu", "liu", ________, ________, ________, "jiu", "qiu", "xiu", ________, ________, ________, ________, ________, ________, ________, + "yan", "bian", "pian", "mian", ________, "dian", "tian", "nian", "lian", ________, ________, ________, "jian", "qian", "xian", ________, ________, ________, ________, ________, ________, ________, + "yin", "bin", "pin", "min", ________, ________, ________, "nin", "lin", ________, ________, ________, "jin", "qin", "xin", ________, ________, ________, ________, ________, ________, ________, + "yang", "biang", ________, ________, ________, "diang", ________, "niang", "liang", ________, ________, ________, "jiang", "qiang", "xiang", ________, ________, ________, ________, ________, ________, ________, + "ying", "bing", "ping", "ming", ________, "ding", "ting", "ning", "ling", ________, ________, ________, "jing", "qing", "xing", ________, ________, ________, ________, ________, ________, ________, + "yong", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "jiong", "qiong", "xiong", ________, ________, ________, ________, ________, ________, ________, + +// Group u Finals + "wu", "bu", "pu", "mu", "fu", "du", "tu", "nu", "lu", "gu", "ku", "hu", ________, ________, ________, "zhu", "chu", "shu", "ru", "zu", "cu", "su", + "wa", ________, ________, ________, ________, ________, ________, ________, ________, "gua", "kua", "hua", ________, ________, ________, "zhua", "chua", "shua", "rua", ________, ________, ________, + "wo", "bo", "po", "mo", "fo", "duo", "tuo", "nuo", "luo", "guo", "kuo", "huo", ________, ________, ________, "zhuo", "chuo", "shuo", "ruo", "zuo", "cuo", "suo", + "wai", ________, ________, ________, ________, ________, ________, ________, ________, "guai", "kuai", "huai", ________, ________, ________, "zhuai", "chuai", "shuai", ________, ________, ________, ________, + "wei", ________, ________, ________, ________, "dui", "tui", ________, ________, "gui", "kui", "hui", ________, ________, ________, "zhui", "chui", "shui", "rui", "zui", "cui", "sui", + "wan", ________, ________, ________, ________, "duan", "tuan", "nuan", "luan", "guan", "kuan", "huan", ________, ________, ________, "zhuan", "chuan", "shuan", "ruan", "zuan", "cuan", "suan", + "wen", ________, ________, ________, ________, "dun", "tun", "nun", "lun", "gun", "kun", "hun", ________, ________, ________, "zhun", "chun", "shun", "run", "zun", "cun", "sun", + "wang", ________, ________, ________, ________, ________, ________, ________, ________, "guang", "kuang", "huang", ________, ________, ________, "zhuang", "chuang", "shuang", ________, ________, ________, ________, + "weng", ________, ________, ________, ________, "dong", "tong", "nong", "long", "gong", "kong", "hong", ________, ________, ________, "zhong", "chong", "shong", "rong", "zong", "cong", "song", + +// Group ü Finals + "yu", ________, ________, ________, ________, ________, ________, "nü", "lü", ________, ________, ________, "ju", "qu", "xu", ________, ________, ________, ________, ________, ________, ________, + "yue", ________, ________, ________, ________, ________, ________, "nüe", "lüe", ________, ________, ________, "jue", "que", "xue", ________, ________, ________, ________, ________, ________, ________, + "yuan", ________, ________, ________, ________, ________, ________, ________, "lüan", ________, ________, ________, "juan", "quan", "xuan", ________, ________, ________, ________, ________, ________, ________, + "yun", ________, ________, ________, ________, ________, ________, ________, "lün", ________, ________, ________, "jun", "qun", "xun", ________, ________, ________, ________, ________, ________, ________, + +// 儿化音节 (er) + "wor", "banr", "pir", "mianr", "fur", "dianr", "tangr", "nar", ________, "ger", "kour", "hair", "jinr", ________, "xiar", "zher", ________, "shir", ________, ________, ________, ________, + "wanr", ________, ________, ________, ________, "dingr", "tuir", "nür", ________, "ganr", "kongr", "haor", ________, ________, "xianr", ________, ________, "shuir", ________, ________, ________, ________, + ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huar", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huor", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huir", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, + +]; + +// 汉语拼音中标声调位置的规则如下: +// +// 1. 如果有a,则标在a上。 +// 2. 如果没有a,但有o或e,则标在这两个字母上。这两个字母不会同时出现。 +// 3. 如果也没有o和e,则一定有i、u或ü。如果i和u同时出现,则标在第二个韵母上。 +// 这是特别针对ui和iu而言的(这两个音的实际读音应该是uei和iou)。 +// 如果i和u不同时出现,则标在出现的那个韵母上。 +// +// "a", "ā", "á", "ǎ", "à", +// "e", "ē", "é", "ě", "è", +// "i", "ī", "í", "ǐ", "ì", +// "m", "m̄", "ḿ", "m̌", "m̀", +// "n", "n̄", "ń", "ň", "ǹ", +// "o", "ō", "ó", "ǒ", "ò", +// "u", "ū", "ú", "ǔ", "ù", +// "ê", "ê̄", "ế", "ê̌", "ề", +// "ü", "ǖ", "ǘ", "ǚ", "ǜ", + +fn mark(s: &str, ch_start: usize, ch_end: usize, tone: usize) -> String { + // vowel + // let a = [ "a", "ā", "á", "ǎ", "à", ]; + // let e = [ "e", "ē", "é", "ě", "è", ]; + // let i = [ "i", "ī", "í", "ǐ", "ì", ]; + // let m = [ "m", "m̄", "ḿ", "m̌", "m̀", ]; + // let n = [ "n", "n̄", "ń", "ň", "ǹ", ]; + // let o = [ "o", "ō", "ó", "ǒ", "ò", ]; + // let u = [ "u", "ū", "ú", "ǔ", "ù", ]; + // let e2 = [ "ê", "ê̄", "ế", "ê̌", "ề", ]; + // let u2 = [ "ü", "ǖ", "ǘ", "ǚ", "ǜ", ]; + + let a = [ "a", "a1", "a2", "a3", "a4", ]; + let e = [ "e", "e1", "e2", "e3", "e4", ]; + let i = [ "i", "i1", "i2", "i3", "i4", ]; + let m = [ "m", "m1", "m2", "m3", "m4", ]; + let n = [ "n", "n1", "n2", "n3", "n4", ]; + let o = [ "o", "o1", "o2", "o3", "o4", ]; + let u = [ "u", "ū", "ú", "ǔ", "ù", ]; + let e2 = [ "ê", "ê1", "ê2", "ê3", "ê4", ]; + let u2 = [ "ü", "ü1", "ü2", "ü3", "ü4", ]; + + let ch = &s[ch_start..ch_end]; + match ch { + "a" => s.replace(ch, a[tone]), + "e" => s.replace(ch, e[tone]), + "i" => s.replace(ch, i[tone]), + "m" => s.replace(ch, m[tone]), + "n" => s.replace(ch, n[tone]), + "o" => s.replace(ch, o[tone]), + "u" => s.replace(ch, u[tone]), + "ê" => s.replace(ch, e2[tone]), + "ü" => s.replace(ch, u2[tone]), + _ => unreachable!(), + } +} + + +fn codegen(tone_index: usize, style: [ [&'static str; 5]; 9]) { + let mut pos = 0usize; + for py in PINYIN_TABLE.iter() { + if pos == 0 { + print!(" "); + } + + if pos != 0 && pos % 22 == 0 { + print!("\n "); + } + + if py == &________ { + print!("________, "); + } else { + let a_index = py.find("a"); + let o_index = py.find("o"); + let e_index = py.find("e"); + + let i_index = py.find("i"); + let u_index = py.find("u"); + let u2_index = py.find("ü"); + + let (index, size) = if a_index.is_some() { + (a_index.unwrap(), 1) + } else if o_index.is_some() { + (o_index.unwrap(), 1) + } else if e_index.is_some() { + (e_index.unwrap(), 1) + } else { + if i_index.is_some() && u_index.is_some() { + assert!(u2_index.is_none()); + (std::cmp::max(i_index.unwrap(), u_index.unwrap()), 1) + } else { + if i_index.is_some() { + assert!(u_index.is_none()); + assert!(u2_index.is_none()); + + (i_index.unwrap(), 1) + } else if u_index.is_some() { + assert!(i_index.is_none()); + assert!(u2_index.is_none()); + + (u_index.unwrap(), 1) + } else if u2_index.is_some() { + assert!(i_index.is_none()); + assert!(u_index.is_none()); + + (u2_index.unwrap(), 2) + } else { + if let Some(index) = py.find("ê") { + (index, 2) + } else { + println!(" {:?} unreachable ...", py); + unreachable!() + } + } + } + }; + + let start = index; + let end = index + size; + + // vowel + let ch = &py[start..end]; + let pinyin_with_tone = match ch { + "a" => py.replace(ch, style[0][tone_index]), + "e" => py.replace(ch, style[1][tone_index]), + "i" => py.replace(ch, style[2][tone_index]), + "m" => py.replace(ch, style[3][tone_index]), + "n" => py.replace(ch, style[4][tone_index]), + "o" => py.replace(ch, style[5][tone_index]), + "u" => py.replace(ch, style[6][tone_index]), + "ê" => py.replace(ch, style[7][tone_index]), + "ü" => py.replace(ch, style[8][tone_index]), + _ => unreachable!(), + }; + print!("{:>8}, ", format!("\"{}\"", pinyin_with_tone)); + } + + pos += 1; + } + println!(); +} + +fn main() { + let styles = [ + [ + [ "a", "ā", "á", "ǎ", "à", ], + [ "e", "ē", "é", "ě", "è", ], + [ "i", "ī", "í", "ǐ", "ì", ], + [ "m", "m̄", "ḿ", "m̌", "m̀", ], + [ "n", "n̄", "ń", "ň", "ǹ", ], + [ "o", "ō", "ó", "ǒ", "ò", ], + [ "u", "ū", "ú", "ǔ", "ù", ], + [ "ê", "ê̄", "ế", "ê̌", "ề", ], + [ "ü", "ǖ", "ǘ", "ǚ", "ǜ", ], + ], + [ + [ "a", "a1", "a2", "a3", "a4", ], + [ "e", "e1", "e2", "e3", "e4", ], + [ "i", "i1", "i2", "i3", "i4", ], + [ "m", "m1", "m2", "m3", "m4", ], + [ "n", "n1", "n2", "n3", "n4", ], + [ "o", "o1", "o2", "o3", "o4", ], + [ "u", "u1", "u2", "u3", "u4", ], + [ "ê", "ê1", "ê2", "ê3", "ê4", ], + [ "ü", "ü1", "ü2", "ü3", "ü4", ], + ], + ]; + + println!("// 符号音调"); + println!("// 第一声: 音调符号标记形式"); + codegen(1, styles[0]); + println!("// 第二声: 音调符号标记形式"); + codegen(2, styles[0]); + println!("// 第三声: 音调符号标记形式"); + codegen(3, styles[0]); + println!("// 第四声: 音调符号标记形式"); + codegen(4, styles[0]); + + println!("// 数字音调"); + println!("// 第一声: 数字音调标记"); + codegen(1, styles[1]); + println!("// 第二声: 数字音调标记"); + codegen(2, styles[1]); + println!("// 第三声: 数字音调标记"); + codegen(3, styles[1]); + println!("// 第四声: 数字音调标记"); + codegen(4, styles[1]); +} \ No newline at end of file diff --git a/crates/syllable/src/initial.rs b/crates/syllable/src/initial.rs index e912f06..9fc8e26 100644 --- a/crates/syllable/src/initial.rs +++ b/crates/syllable/src/initial.rs @@ -14,6 +14,7 @@ pub static INITIAL_TABLE: [&'static str; 21] = [ "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s", + // "ẑ", "ĉ", "ŝ", ]; // 8 Bits diff --git a/crates/syllable/src/letter.rs b/crates/syllable/src/letter.rs index 1edf86b..44f3058 100644 --- a/crates/syllable/src/letter.rs +++ b/crates/syllable/src/letter.rs @@ -32,6 +32,15 @@ pub static SUPER_SCRIPT_TABLE: [&'static str; 10] = [ ]; +// "ê\u{304}", "ê\u{30c}", +// "ǹ", +// "ǹg", +// "ḿ", +// "ế", +// "ề" +// +// n,m,ng,hm,ê +// n、ng、m、hn、hng、hm、ê // 扬抑符 e-circumflex: ê Ê // 分音符 u-diaeresis: ü Ü @@ -40,15 +49,6 @@ pub static SUPER_SCRIPT_TABLE: [&'static str; 10] = [ // /// 拼音字母表 pub static LETTER_TABLE: [&'static str; 166] = [ - "0", "1", "2", "3", "4", - "5", "6", "7", "8", "9", - "⁰", "¹", "²", "³", "⁴", - "⁵", "⁶", "⁷", "⁸", "⁹", - // 音调符号 - "˙", "ˉ", "ˊ", "ˇ", "ˋ", - // 隔音符号 - "'", - // 声母表码位段 "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", @@ -83,6 +83,15 @@ pub static LETTER_TABLE: [&'static str; 166] = [ "U", "Ū", "Ú", "Ǔ", "Ù", "Ê", "Ê̄", "Ế", "Ê̌", "Ề", "Ü", "Ǖ", "Ǘ", "Ǚ", "Ǜ", + + "0", "1", "2", "3", "4", + "5", "6", "7", "8", "9", + "⁰", "¹", "²", "³", "⁴", + "⁵", "⁶", "⁷", "⁸", "⁹", + // 音调符号 + "˙", "ˉ", "ˊ", "ˇ", "ˋ", + // 隔音符号 + "'", ]; @@ -93,7 +102,7 @@ pub struct Letter(pub(crate) u8); impl Letter { pub const MIN: Self = Letter(0); - pub const MAX: Self = Letter(140); + pub const MAX: Self = Letter(166); #[inline] pub fn as_str(&self) -> &'static str { @@ -132,14 +141,6 @@ impl Letter { unimplemented!() } - pub fn len_gbk(self) -> usize { - unimplemented!() - } - - pub fn len_gb2312(self) -> usize { - unimplemented!() - } - pub fn len_gb18030(self) -> usize { unimplemented!() } @@ -156,24 +157,63 @@ impl Letter { unimplemented!() } - pub fn encode_gbk(self, _dst: &mut [u8]) -> usize { + pub fn encode_gb18030(self, _dst: &mut [u8]) -> usize { unimplemented!() } - pub fn encode_gb2312(self, _dst: &mut [u8]) -> usize { - unimplemented!() + #[inline] + pub fn is_lowercase(self) -> bool { + // 5 * 14 = 70 + match self.0 { + 0 ..= 69 => true, + 70 ..= 139 => false, + _ => false, + } } - pub fn encode_gb18030(self, _dst: &mut [u8]) -> usize { - unimplemented!() + #[inline] + pub fn is_uppercase(self) -> bool { + match self.0 { + 0 ..= 69 => false, + 70 ..= 139 => true, + _ => false, + } } - pub fn is_lowercase(self) -> bool { - unimplemented!() + #[inline] + pub fn is_simplified(&self) -> bool { + // "ẑ", "ĉ", "ŝ", "ŋ" + // 21 22 23 24 + // "Ẑ", "Ĉ", "Ŝ", "Ŋ", + // 96 97 98 99 + match self.0 { + 21 ..= 24 => true, + 96 ..= 99 => true, + _ => false, + } } - pub fn is_uppercase(self) -> bool { - unimplemented!() + #[inline] + pub fn is_primitive(&self) -> bool { + !self.is_simplified() + } + + #[inline] + pub fn to_lowercase(&self) -> Self { + match self.0 { + 0 ..= 69 => *self, + 70 ..= 139 => Self(self.0 - 70), + _ => *self, + } + } + + #[inline] + pub fn to_uppercase(&self) -> Self { + match self.0 { + 0 ..= 69 => Self(self.0 + 70), + 70 ..= 139 => *self, + _ => *self, + } } pub fn is_id_start(self) -> bool { @@ -192,19 +232,205 @@ impl Letter { unimplemented!() } - - + // 包括 `w/W/y/Y` pub fn is_initial(self) -> bool { - unimplemented!() - } - pub fn is_final(self) -> bool { - unimplemented!() + match self.0 { + 0 ..= 19 => true, + 21 ..= 23 => true, // "ẑ", "ĉ", "ŝ" + 70 ..= 89 => true, + 96 ..= 98 => true, // "Ẑ", "Ĉ", "Ŝ" + _ => false, + } } pub fn is_vowel(self) -> bool { - unimplemented!() + match self.0 { + 25 ..= 69 => true, + 95 ..= 139 => true, + _ => false, + } } - pub fn is_rhyme(self) -> bool { - unimplemented!() +} + + +impl std::str::FromStr for Letter { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "b" => Ok(Letter(0)), + "p" => Ok(Letter(1)), + "m" => Ok(Letter(2)), + "f" => Ok(Letter(3)), + "d" => Ok(Letter(4)), + "t" => Ok(Letter(5)), + "n" => Ok(Letter(6)), + "l" => Ok(Letter(7)), + "g" => Ok(Letter(8)), + "k" => Ok(Letter(9)), + "h" => Ok(Letter(10)), + "j" => Ok(Letter(11)), + "q" => Ok(Letter(12)), + "x" => Ok(Letter(13)), + "r" => Ok(Letter(14)), + "z" => Ok(Letter(15)), + "c" => Ok(Letter(16)), + "s" => Ok(Letter(17)), + "w" => Ok(Letter(18)), + "y" => Ok(Letter(19)), + "v" => Ok(Letter(20)), + "ẑ" => Ok(Letter(21)), + "ĉ" => Ok(Letter(22)), + "ŝ" => Ok(Letter(23)), + "ŋ" => Ok(Letter(24)), + "a" => Ok(Letter(25)), + "ā" => Ok(Letter(26)), + "á" => Ok(Letter(27)), + "ǎ" => Ok(Letter(28)), + "à" => Ok(Letter(29)), + "e" => Ok(Letter(30)), + "ē" => Ok(Letter(31)), + "é" => Ok(Letter(32)), + "ě" => Ok(Letter(33)), + "è" => Ok(Letter(34)), + "i" => Ok(Letter(35)), + "ī" => Ok(Letter(36)), + "í" => Ok(Letter(37)), + "ǐ" => Ok(Letter(38)), + "ì" => Ok(Letter(39)), + "m" => Ok(Letter(40)), + "m̄" => Ok(Letter(41)), + "ḿ" => Ok(Letter(42)), + "m̌" => Ok(Letter(43)), + "m̀" => Ok(Letter(44)), + "n" => Ok(Letter(45)), + "n̄" => Ok(Letter(46)), + "ń" => Ok(Letter(47)), + "ň" => Ok(Letter(48)), + "ǹ" => Ok(Letter(49)), + "o" => Ok(Letter(50)), + "ō" => Ok(Letter(51)), + "ó" => Ok(Letter(52)), + "ǒ" => Ok(Letter(53)), + "ò" => Ok(Letter(54)), + "u" => Ok(Letter(55)), + "ū" => Ok(Letter(56)), + "ú" => Ok(Letter(57)), + "ǔ" => Ok(Letter(58)), + "ù" => Ok(Letter(59)), + "ê" => Ok(Letter(60)), + "ê̄" => Ok(Letter(61)), + "ế" => Ok(Letter(62)), + "ê̌" => Ok(Letter(63)), + "ề" => Ok(Letter(64)), + "ü" => Ok(Letter(65)), + "ǖ" => Ok(Letter(66)), + "ǘ" => Ok(Letter(67)), + "ǚ" => Ok(Letter(68)), + "ǜ" => Ok(Letter(69)), + "B" => Ok(Letter(70)), + "P" => Ok(Letter(71)), + "M" => Ok(Letter(72)), + "F" => Ok(Letter(73)), + "D" => Ok(Letter(74)), + "T" => Ok(Letter(75)), + "N" => Ok(Letter(76)), + "L" => Ok(Letter(77)), + "G" => Ok(Letter(78)), + "K" => Ok(Letter(79)), + "H" => Ok(Letter(80)), + "J" => Ok(Letter(81)), + "Q" => Ok(Letter(82)), + "X" => Ok(Letter(83)), + "R" => Ok(Letter(84)), + "Z" => Ok(Letter(85)), + "C" => Ok(Letter(86)), + "S" => Ok(Letter(87)), + "W" => Ok(Letter(88)), + "Y" => Ok(Letter(89)), + "V" => Ok(Letter(90)), + "Ẑ" => Ok(Letter(91)), + "Ĉ" => Ok(Letter(92)), + "Ŝ" => Ok(Letter(93)), + "Ŋ" => Ok(Letter(94)), + "A" => Ok(Letter(95)), + "Ā" => Ok(Letter(96)), + "Á" => Ok(Letter(97)), + "Ǎ" => Ok(Letter(98)), + "À" => Ok(Letter(99)), + "E" => Ok(Letter(100)), + "Ē" => Ok(Letter(101)), + "É" => Ok(Letter(102)), + "Ě" => Ok(Letter(103)), + "È" => Ok(Letter(104)), + "I" => Ok(Letter(105)), + "Ī" => Ok(Letter(106)), + "Í" => Ok(Letter(107)), + "Ǐ" => Ok(Letter(108)), + "Ì" => Ok(Letter(109)), + "M" => Ok(Letter(110)), + "M̄" => Ok(Letter(111)), + "Ḿ" => Ok(Letter(112)), + "M̌" => Ok(Letter(113)), + "M̀" => Ok(Letter(114)), + "N" => Ok(Letter(115)), + "N̄" => Ok(Letter(116)), + "Ń" => Ok(Letter(117)), + "Ň" => Ok(Letter(118)), + "Ǹ" => Ok(Letter(119)), + "O" => Ok(Letter(120)), + "Ō" => Ok(Letter(121)), + "Ó" => Ok(Letter(122)), + "Ǒ" => Ok(Letter(123)), + "Ò" => Ok(Letter(124)), + "U" => Ok(Letter(125)), + "Ū" => Ok(Letter(126)), + "Ú" => Ok(Letter(127)), + "Ǔ" => Ok(Letter(128)), + "Ù" => Ok(Letter(129)), + "Ê" => Ok(Letter(130)), + "Ê̄" => Ok(Letter(131)), + "Ế" => Ok(Letter(132)), + "Ê̌" => Ok(Letter(133)), + "Ề" => Ok(Letter(134)), + "Ü" => Ok(Letter(135)), + "Ǖ" => Ok(Letter(136)), + "Ǘ" => Ok(Letter(137)), + "Ǚ" => Ok(Letter(138)), + "Ǜ" => Ok(Letter(139)), + "0" => Ok(Letter(140)), + "1" => Ok(Letter(141)), + "2" => Ok(Letter(142)), + "3" => Ok(Letter(143)), + "4" => Ok(Letter(144)), + "5" => Ok(Letter(145)), + "6" => Ok(Letter(146)), + "7" => Ok(Letter(147)), + "8" => Ok(Letter(148)), + "9" => Ok(Letter(149)), + "⁰" => Ok(Letter(150)), + "¹" => Ok(Letter(151)), + "²" => Ok(Letter(152)), + "³" => Ok(Letter(153)), + "⁴" => Ok(Letter(154)), + "⁵" => Ok(Letter(155)), + "⁶" => Ok(Letter(156)), + "⁷" => Ok(Letter(157)), + "⁸" => Ok(Letter(158)), + "⁹" => Ok(Letter(159)), + "˙" => Ok(Letter(160)), + "ˉ" => Ok(Letter(161)), + "ˊ" => Ok(Letter(162)), + "ˇ" => Ok(Letter(163)), + "ˋ" => Ok(Letter(164)), + "'" => Ok(Letter(165)), + _ => Err(()), + } } +} + + +#[test] +fn test_letter() { + } \ No newline at end of file diff --git a/crates/syllable/src/lib.rs b/crates/syllable/src/lib.rs index ff10868..b7f9cfe 100644 --- a/crates/syllable/src/lib.rs +++ b/crates/syllable/src/lib.rs @@ -9,3 +9,70 @@ pub mod pinyin; pub(crate) const SP: &'static str = " "; + + +// LetterIter +pub struct Letters { + +} + +impl Iterator for Letters { + type Item = letter::Letter; + + fn next(&mut self) -> Option { + unimplemented!() + } +} + + +pub trait Syllable { + // 拼音表偏移量 + fn offset(&self) -> u16; + + // 音调 + fn tone(&self) -> tone::Tone; + // 拼音音调标记形式 + fn tone_format(&self) -> tone::ToneFormat; + + // 元音字母 + fn vowel(&self) -> letter::Letter; + // 携带音调的元音字母 + fn tone_mark(&self) -> letter::Letter; + + // 松散数据 + // 声母部分 + fn initials(&self) -> Option<&'static str>; // 支持输出 Y/W + // 韵母部分,改写或补写后的形式 + // 不携带声调 以及简写形式字母 + fn finals(&self) -> &'static str; + + // 结构化数据 + // 声母 + fn consonant(&self) -> Option; // 不支持 Y/W + // 韵母 (原始形式) + fn rhyme(&self) -> rhyme::Rhyme; + + // 儿化音结尾 ? + fn is_er(&self) -> bool; + + // 是否为简写形式 + fn is_simplified(&self) -> bool; + + // 拼音字母列表 + fn letters(&self) -> Letters; // 类似于 String::chars() -> Chars + + // 不带声调以及不使用简写字母的形式 + fn plain(&self) -> Self; + // 转换为简写形式 + fn simplified(&self) -> Self; + + // 输出 + fn as_str(&self) -> &'static str; + + // 重设音调 + fn with_tone(&self, tone: tone::Tone) -> Self; + // 重设音调标记形式 + fn with_tone_fotmat(&self, tone_format: tone::ToneFormat) -> Self; + // 重设音调以及音调标记形式 + fn formart(&self, tone: tone::Tone, tone_format: tone::ToneFormat) -> Self; +} diff --git a/crates/syllable/src/main.rs b/crates/syllable/src/main.rs index 66408f6..2dd06bc 100644 --- a/crates/syllable/src/main.rs +++ b/crates/syllable/src/main.rs @@ -1,199 +1,30 @@ -extern crate syllable; +#[allow(unused_imports)] -pub(crate) const ________: &'static str = " "; +use syllable::pinyin::{ PinYin, PINYIN_TABLE, }; +use syllable::tone::{ Tone, ToneFormat, }; -// ∅ b p m f d t n l g k h j q x zh ch sh r z c s -pub static PINYIN_TABLE: [&'static str; 924] = [ -// ∅, b, p, m, f, d, t, n, l, g, k, h, j, q, x, zh, ch, sh, r, z, c, s, -// Group a Finals - ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhi", "chi", "shi", "ri", "zi", "ci", "si", - "a", "ba", "pa", "ma", "fa", "da", "ta", "na", "la", "ga", "ka", "ha", ________, ________, ________, "zha", "cha", "sha", ________, "za", "ca", "sa", - "e", ________, ________, "me", ________, "de", "te", "ne", "le", "ge", "ke", "he", ________, ________, ________, "zhe", "che", "she", "re", "ze", "ce", "se", - "ê", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - "er", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - "ai", "bai", "pai", "mai", ________, "dai", "tai", "nai", "lai", "gai", "kai", "hai", ________, ________, ________, "zhai", "chai", "shai", ________, "zai", "cai", "sai", - "ei", "bei", "pei", "mei", "fei", "dei", "tei", "nei", "lei", "gei", "kei", "hei", ________, ________, ________, "zhei", ________, "shei", ________, "zei", ________, "sei", - "ao", "bao", "pao", "mao", ________, "dao", "tao", "nao", "lao", "gao", "kao", "hao", ________, ________, ________, "zhao", "chao", "shao", "rao", "zao", "cao", "sao", - "ou", ________, "pou", "mou", "fou", "dou", "tou", "nou", "lou", "gou", "kou", "hou", ________, ________, ________, "zhou", "chou", "shou", "rou", "zou", "cou", "sou", - "an", "ban", "pan", "man", "fan", "dan", "tan", "nan", "lan", "gan", "kan", "han", ________, ________, ________, "zhan", "chan", "shan", "ran", "zan", "can", "san", - "en", "ben", "pen", "men", "fen", "den", ________, "nen", ________, "gen", "ken", "hen", ________, ________, ________, "zhen", "chen", "shen", "ren", "zen", "cen", "sen", - "ang", "bang", "pang", "mang", "fang", "dang", "tang", "nang", "lang", "gang", "kang", "hang", ________, ________, ________, "zhang", "chang", "shang", "rang", "zang", "cang", "sang", - "eng", "beng", "peng", "meng", "feng", "deng", "teng", "neng", "leng", "geng", "keng", "heng", ________, ________, ________, "zheng", "cheng", "sheng", "reng", "zeng", "ceng", "seng", - // 注: "ong" 不存在 - -// Group i Finals - "yi", "bi", "pi", "mi", ________, "di", "ti", "ni", "li", ________, ________, ________, "ji", "qi", "xi", ________, ________, ________, ________, ________, ________, ________, - "ya", ________, ________, ________, ________, "dia", ________, "nia", "lia", ________, ________, ________, "jia", "qia", "xia", ________, ________, ________, ________, ________, ________, ________, - "yo", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - "ye", "bie", "pie", "mie", ________, "die", "tie", "nie", "lie", ________, ________, ________, "jie", "qie", "xie", ________, ________, ________, ________, ________, ________, ________, - // "yai", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - "yao", "biao", "piao", "miao", "fiao", "diao", "tiao", "niao", "liao", ________, ________, ________, "jiao", "qiao", "xiao", ________, ________, ________, ________, ________, ________, ________, - "you", ________, ________, "miu", ________, "diu", ________, "niu", "liu", ________, ________, ________, "jiu", "qiu", "xiu", ________, ________, ________, ________, ________, ________, ________, - "yan", "bian", "pian", "mian", ________, "dian", "tian", "nian", "lian", ________, ________, ________, "jian", "qian", "xian", ________, ________, ________, ________, ________, ________, ________, - "yin", "bin", "pin", "min", ________, ________, ________, "nin", "lin", ________, ________, ________, "jin", "qin", "xin", ________, ________, ________, ________, ________, ________, ________, - "yang", "biang", ________, ________, ________, "diang", ________, "niang", "liang", ________, ________, ________, "jiang", "qiang", "xiang", ________, ________, ________, ________, ________, ________, ________, - "ying", "bing", "ping", "ming", ________, "ding", "ting", "ning", "ling", ________, ________, ________, "jing", "qing", "xing", ________, ________, ________, ________, ________, ________, ________, - "yong", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "jiong", "qiong", "xiong", ________, ________, ________, ________, ________, ________, ________, - -// Group u Finals - "wu", "bu", "pu", "mu", "fu", "du", "tu", "nu", "lu", "gu", "ku", "hu", ________, ________, ________, "zhu", "chu", "shu", "ru", "zu", "cu", "su", - "wa", ________, ________, ________, ________, ________, ________, ________, ________, "gua", "kua", "hua", ________, ________, ________, "zhua", "chua", "shua", "rua", ________, ________, ________, - "wo", "bo", "po", "mo", "fo", "duo", "tuo", "nuo", "luo", "guo", "kuo", "huo", ________, ________, ________, "zhuo", "chuo", "shuo", "ruo", "zuo", "cuo", "suo", - "wai", ________, ________, ________, ________, ________, ________, ________, ________, "guai", "kuai", "huai", ________, ________, ________, "zhuai", "chuai", "shuai", ________, ________, ________, ________, - "wei", ________, ________, ________, ________, "dui", "tui", ________, ________, "gui", "kui", "hui", ________, ________, ________, "zhui", "chui", "shui", "rui", "zui", "cui", "sui", - "wan", ________, ________, ________, ________, "duan", "tuan", "nuan", "luan", "guan", "kuan", "huan", ________, ________, ________, "zhuan", "chuan", "shuan", "ruan", "zuan", "cuan", "suan", - "wen", ________, ________, ________, ________, "dun", "tun", "nun", "lun", "gun", "kun", "hun", ________, ________, ________, "zhun", "chun", "shun", "run", "zun", "cun", "sun", - "wang", ________, ________, ________, ________, ________, ________, ________, ________, "guang", "kuang", "huang", ________, ________, ________, "zhuang", "chuang", "shuang", ________, ________, ________, ________, - "weng", ________, ________, ________, ________, "dong", "tong", "nong", "long", "gong", "kong", "hong", ________, ________, ________, "zhong", "chong", "shong", "rong", "zong", "cong", "song", - -// Group ü Finals - "yu", ________, ________, ________, ________, ________, ________, "nü", "lü", ________, ________, ________, "ju", "qu", "xu", ________, ________, ________, ________, ________, ________, ________, - "yue", ________, ________, ________, ________, ________, ________, "nüe", "lüe", ________, ________, ________, "jue", "que", "xue", ________, ________, ________, ________, ________, ________, ________, - "yuan", ________, ________, ________, ________, ________, ________, ________, "lüan", ________, ________, ________, "juan", "quan", "xuan", ________, ________, ________, ________, ________, ________, ________, - "yun", ________, ________, ________, ________, ________, ________, ________, "lün", ________, ________, ________, "jun", "qun", "xun", ________, ________, ________, ________, ________, ________, ________, - -// 儿化音节 (er) - "wor", "banr", "pir", "mianr", "fur", "dianr", "tangr", "nar", ________, "ger", "kour", "hair", "jinr", ________, "xiar", "zher", ________, "shir", ________, ________, ________, ________, - "wanr", ________, ________, ________, ________, "dingr", "tuir", "nür", ________, "ganr", "kongr", "haor", ________, ________, "xianr", ________, ________, "shuir", ________, ________, ________, ________, - ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huar", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huor", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huir", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - -]; - -// 汉语拼音中标声调位置的规则如下: -// -// 1. 如果有a,则标在a上。 -// 2. 如果没有a,但有o或e,则标在这两个字母上。这两个字母不会同时出现。 -// 3. 如果也没有o和e,则一定有i、u或ü。如果i和u同时出现,则标在第二个韵母上。 -// 这是特别针对ui和iu而言的(这两个音的实际读音应该是uei和iou)。 -// 如果i和u不同时出现,则标在出现的那个韵母上。 -// -// "a", "ā", "á", "ǎ", "à", -// "e", "ē", "é", "ě", "è", -// "i", "ī", "í", "ǐ", "ì", -// "m", "m̄", "ḿ", "m̌", "m̀", -// "n", "n̄", "ń", "ň", "ǹ", -// "o", "ō", "ó", "ǒ", "ò", -// "u", "ū", "ú", "ǔ", "ù", -// "ê", "ê̄", "ế", "ê̌", "ề", -// "ü", "ǖ", "ǘ", "ǚ", "ǜ", - -fn mark(s: &str, ch_start: usize, ch_end: usize, tone: usize) -> String { - // vowel - // let a = [ "a", "ā", "á", "ǎ", "à", ]; - // let e = [ "e", "ē", "é", "ě", "è", ]; - // let i = [ "i", "ī", "í", "ǐ", "ì", ]; - // let m = [ "m", "m̄", "ḿ", "m̌", "m̀", ]; - // let n = [ "n", "n̄", "ń", "ň", "ǹ", ]; - // let o = [ "o", "ō", "ó", "ǒ", "ò", ]; - // let u = [ "u", "ū", "ú", "ǔ", "ù", ]; - // let e2 = [ "ê", "ê̄", "ế", "ê̌", "ề", ]; - // let u2 = [ "ü", "ǖ", "ǘ", "ǚ", "ǜ", ]; - - let a = [ "a", "a1", "a2", "a3", "a4", ]; - let e = [ "e", "e1", "e2", "e3", "e4", ]; - let i = [ "i", "i1", "i2", "i3", "i4", ]; - let m = [ "m", "m1", "m2", "m3", "m4", ]; - let n = [ "n", "n1", "n2", "n3", "n4", ]; - let o = [ "o", "o1", "o2", "o3", "o4", ]; - let u = [ "u", "ū", "ú", "ǔ", "ù", ]; - let e2 = [ "ê", "ê1", "ê2", "ê3", "ê4", ]; - let u2 = [ "ü", "ü1", "ü2", "ü3", "ü4", ]; - - let ch = &s[ch_start..ch_end]; - match ch { - "a" => s.replace(ch, a[tone]), - "e" => s.replace(ch, e[tone]), - "i" => s.replace(ch, i[tone]), - "m" => s.replace(ch, m[tone]), - "n" => s.replace(ch, n[tone]), - "o" => s.replace(ch, o[tone]), - "u" => s.replace(ch, u[tone]), - "ê" => s.replace(ch, e2[tone]), - "ü" => s.replace(ch, u2[tone]), - _ => unreachable!(), - } -} - - -fn codegen(tone_index: usize) { - // let mut tone1: Vec = vec![]; - // let mut tone2: Vec = vec![]; - // let mut tone3: Vec = vec![]; - // let mut tone4: Vec = vec![]; - - let mut pos = 0usize; - for py in PINYIN_TABLE.iter() { - if py != &" " { - let a_index = py.find("a"); - let o_index = py.find("o"); - let e_index = py.find("e"); - - let i_index = py.find("i"); - let u_index = py.find("u"); - let u2_index = py.find("ü"); - - let (index, size) = if a_index.is_some() { - (a_index.unwrap(), 1) - } else if o_index.is_some() { - (o_index.unwrap(), 1) - } else if e_index.is_some() { - (e_index.unwrap(), 1) - } else { - if i_index.is_some() && u_index.is_some() { - assert!(u2_index.is_none()); - (std::cmp::max(i_index.unwrap(), u_index.unwrap()), 1) - } else { - if i_index.is_some() { - assert!(u_index.is_none()); - assert!(u2_index.is_none()); - - (i_index.unwrap(), 1) - } else if u_index.is_some() { - assert!(i_index.is_none()); - assert!(u2_index.is_none()); - - (u_index.unwrap(), 1) - } else if u2_index.is_some() { - assert!(i_index.is_none()); - assert!(u_index.is_none()); - - (u2_index.unwrap(), 2) - } else { - if let Some(index) = py.find("ê") { - (index, 2) - } else { - println!(" {:?} unreachable ...", py); - unreachable!() - } - } - } - }; - - let start = index; - let end = index + size; - - print!("{:>8}, ", format!("\"{}\"", mark(py, start, end, tone_index)) ); - } else { - print!("________, "); - } - - if pos != 0 && pos % 22 == 21 { - print!("\n "); - } - - pos += 1; - } -} fn main() { - println!("// 第一声"); - codegen(1); - println!("// 第二声"); - codegen(2); - println!("// 第三声"); - codegen(3); - println!("// 第四声"); - codegen(4); + let s = PinYin::new_unchecked(21 << 1); + println!("{:?}", s); + println!("{:?}", s.consonant()); + + println!("{:?}", s.format(Tone::Second, ToneFormat::Plain)); + println!("{:?}", s.format(Tone::Second, ToneFormat::Mark)); + println!("{:?}", s.format(Tone::Second, ToneFormat::Number)); + + // let i = s.0 as usize; + + // // println!("{} {:?}", s, &PINYIN_TABLE[i + 10 .. i+100]); + // println!("{:?}", PINYIN_TABLE[i],); + // println!("{:?}", PINYIN_TABLE[i + 924 * 1]); + // println!("{:?}", PINYIN_TABLE[i + 924 * 2]); + // println!("{:?}", PINYIN_TABLE[i + 924 * 3]); + // println!("{:?}", PINYIN_TABLE[i + 924 * 4]); + + // println!("{:?}", PINYIN_TABLE[i + 924 * 4 + 924 * 1]); + // println!("{:?}", PINYIN_TABLE[i + 924 * 4 + 924 * 2]); + // println!("{:?}", PINYIN_TABLE[i + 924 * 4 + 924 * 3]); + // println!("{:?}", PINYIN_TABLE[i + 924 * 4 + 924 * 4]); + } \ No newline at end of file diff --git a/crates/syllable/src/pinyin.rs b/crates/syllable/src/pinyin.rs index d3b8155..90a2abb 100644 --- a/crates/syllable/src/pinyin.rs +++ b/crates/syllable/src/pinyin.rs @@ -4,6 +4,7 @@ use crate::SP; use crate::tone::{ Tone, ToneFormat, }; use crate::initial::{ INITIAL_TABLE, Initial, }; use crate::rhyme::Rhyme; +use crate::letter::Letter; const ________: &'static str = SP; @@ -11,7 +12,7 @@ const ________: &'static str = SP; // 拼音音节总表 // ∅ b p m f d t n l g k h j q x zh ch sh r z c s -// Len: 924 * 5 = 4620 +// Len: 924 + 924 * 4 * 2 = 8316 pub static PINYIN_TABLE: [&'static str; 8316] = [ // ∅, b, p, m, f, d, t, n, l, g, k, h, j, q, x, zh, ch, sh, r, z, c, s, // Group a Finals @@ -69,7 +70,8 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huir", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, -// 第一声 +// 调符 +// 第一声: 音调符号标记形式 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhī", "chī", "shī", "rī", "zī", "cī", "sī", "ā", "bā", "pā", "mā", "fā", "dā", "tā", "nā", "lā", "gā", "kā", "hā", ________, ________, ________, "zhā", "chā", "shā", ________, "zā", "cā", "sā", "ē", ________, ________, "mē", ________, "dē", "tē", "nē", "lē", "gē", "kē", "hē", ________, ________, ________, "zhē", "chē", "shē", "rē", "zē", "cē", "sē", @@ -112,8 +114,7 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huār", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huōr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huīr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - -// 第二声 +// 第二声: 音调符号标记形式 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhí", "chí", "shí", "rí", "zí", "cí", "sí", "á", "bá", "pá", "má", "fá", "dá", "tá", "ná", "lá", "gá", "ká", "há", ________, ________, ________, "zhá", "chá", "shá", ________, "zá", "cá", "sá", "é", ________, ________, "mé", ________, "dé", "té", "né", "lé", "gé", "ké", "hé", ________, ________, ________, "zhé", "ché", "shé", "ré", "zé", "cé", "sé", @@ -156,8 +157,7 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huár", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huór", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huír", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - -// 第三声 +// 第三声: 音调符号标记形式 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhǐ", "chǐ", "shǐ", "rǐ", "zǐ", "cǐ", "sǐ", "ǎ", "bǎ", "pǎ", "mǎ", "fǎ", "dǎ", "tǎ", "nǎ", "lǎ", "gǎ", "kǎ", "hǎ", ________, ________, ________, "zhǎ", "chǎ", "shǎ", ________, "zǎ", "cǎ", "sǎ", "ě", ________, ________, "mě", ________, "dě", "tě", "ně", "lě", "gě", "kě", "hě", ________, ________, ________, "zhě", "chě", "shě", "rě", "zě", "cě", "sě", @@ -200,8 +200,7 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huǎr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huǒr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huǐr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - -// 第四声 +// 第四声: 音调符号标记形式 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhì", "chì", "shì", "rì", "zì", "cì", "sì", "à", "bà", "pà", "mà", "fà", "dà", "tà", "nà", "là", "gà", "kà", "hà", ________, ________, ________, "zhà", "chà", "shà", ________, "zà", "cà", "sà", "è", ________, ________, "mè", ________, "dè", "tè", "nè", "lè", "gè", "kè", "hè", ________, ________, ________, "zhè", "chè", "shè", "rè", "zè", "cè", "sè", @@ -244,9 +243,8 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huàr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huòr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huìr", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, - -// 数字标记方法 -// 第一声 +// 数字音调 +// 第一声: 数字音调标记 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhi1", "chi1", "shi1", "ri1", "zi1", "ci1", "si1", "a1", "ba1", "pa1", "ma1", "fa1", "da1", "ta1", "na1", "la1", "ga1", "ka1", "ha1", ________, ________, ________, "zha1", "cha1", "sha1", ________, "za1", "ca1", "sa1", "e1", ________, ________, "me1", ________, "de1", "te1", "ne1", "le1", "ge1", "ke1", "he1", ________, ________, ________, "zhe1", "che1", "she1", "re1", "ze1", "ce1", "se1", @@ -265,31 +263,31 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ "yo1", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "ye1", "bie1", "pie1", "mie1", ________, "die1", "tie1", "nie1", "lie1", ________, ________, ________, "jie1", "qie1", "xie1", ________, ________, ________, ________, ________, ________, ________, "ya1o", "bia1o", "pia1o", "mia1o", "fia1o", "dia1o", "tia1o", "nia1o", "lia1o", ________, ________, ________, "jia1o", "qia1o", "xia1o", ________, ________, ________, ________, ________, ________, ________, - "yo1u", ________, ________, "miū", ________, "diū", ________, "niū", "liū", ________, ________, ________, "jiū", "qiū", "xiū", ________, ________, ________, ________, ________, ________, ________, + "yo1u", ________, ________, "miu1", ________, "diu1", ________, "niu1", "liu1", ________, ________, ________, "jiu1", "qiu1", "xiu1", ________, ________, ________, ________, ________, ________, ________, "ya1n", "bia1n", "pia1n", "mia1n", ________, "dia1n", "tia1n", "nia1n", "lia1n", ________, ________, ________, "jia1n", "qia1n", "xia1n", ________, ________, ________, ________, ________, ________, ________, "yi1n", "bi1n", "pi1n", "mi1n", ________, ________, ________, "ni1n", "li1n", ________, ________, ________, "ji1n", "qi1n", "xi1n", ________, ________, ________, ________, ________, ________, ________, "ya1ng", "bia1ng", ________, ________, ________, "dia1ng", ________, "nia1ng", "lia1ng", ________, ________, ________, "jia1ng", "qia1ng", "xia1ng", ________, ________, ________, ________, ________, ________, ________, "yi1ng", "bi1ng", "pi1ng", "mi1ng", ________, "di1ng", "ti1ng", "ni1ng", "li1ng", ________, ________, ________, "ji1ng", "qi1ng", "xi1ng", ________, ________, ________, ________, ________, ________, ________, "yo1ng", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "jio1ng", "qio1ng", "xio1ng", ________, ________, ________, ________, ________, ________, ________, - "wū", "bū", "pū", "mū", "fū", "dū", "tū", "nū", "lū", "gū", "kū", "hū", ________, ________, ________, "zhū", "chū", "shū", "rū", "zū", "cū", "sū", + "wu1", "bu1", "pu1", "mu1", "fu1", "du1", "tu1", "nu1", "lu1", "gu1", "ku1", "hu1", ________, ________, ________, "zhu1", "chu1", "shu1", "ru1", "zu1", "cu1", "su1", "wa1", ________, ________, ________, ________, ________, ________, ________, ________, "gua1", "kua1", "hua1", ________, ________, ________, "zhua1", "chua1", "shua1", "rua1", ________, ________, ________, "wo1", "bo1", "po1", "mo1", "fo1", "duo1", "tuo1", "nuo1", "luo1", "guo1", "kuo1", "huo1", ________, ________, ________, "zhuo1", "chuo1", "shuo1", "ruo1", "zuo1", "cuo1", "suo1", "wa1i", ________, ________, ________, ________, ________, ________, ________, ________, "gua1i", "kua1i", "hua1i", ________, ________, ________, "zhua1i", "chua1i", "shua1i", ________, ________, ________, ________, "we1i", ________, ________, ________, ________, "dui1", "tui1", ________, ________, "gui1", "kui1", "hui1", ________, ________, ________, "zhui1", "chui1", "shui1", "rui1", "zui1", "cui1", "sui1", "wa1n", ________, ________, ________, ________, "dua1n", "tua1n", "nua1n", "lua1n", "gua1n", "kua1n", "hua1n", ________, ________, ________, "zhua1n", "chua1n", "shua1n", "rua1n", "zua1n", "cua1n", "sua1n", - "we1n", ________, ________, ________, ________, "dūn", "tūn", "nūn", "lūn", "gūn", "kūn", "hūn", ________, ________, ________, "zhūn", "chūn", "shūn", "rūn", "zūn", "cūn", "sūn", + "we1n", ________, ________, ________, ________, "du1n", "tu1n", "nu1n", "lu1n", "gu1n", "ku1n", "hu1n", ________, ________, ________, "zhu1n", "chu1n", "shu1n", "ru1n", "zu1n", "cu1n", "su1n", "wa1ng", ________, ________, ________, ________, ________, ________, ________, ________, "gua1ng", "kua1ng", "hua1ng", ________, ________, ________, "zhua1ng", "chua1ng", "shua1ng", ________, ________, ________, ________, "we1ng", ________, ________, ________, ________, "do1ng", "to1ng", "no1ng", "lo1ng", "go1ng", "ko1ng", "ho1ng", ________, ________, ________, "zho1ng", "cho1ng", "sho1ng", "ro1ng", "zo1ng", "co1ng", "so1ng", - "yū", ________, ________, ________, ________, ________, ________, "nü1", "lü1", ________, ________, ________, "jū", "qū", "xū", ________, ________, ________, ________, ________, ________, ________, + "yu1", ________, ________, ________, ________, ________, ________, "nü1", "lü1", ________, ________, ________, "ju1", "qu1", "xu1", ________, ________, ________, ________, ________, ________, ________, "yue1", ________, ________, ________, ________, ________, ________, "nüe1", "lüe1", ________, ________, ________, "jue1", "que1", "xue1", ________, ________, ________, ________, ________, ________, ________, "yua1n", ________, ________, ________, ________, ________, ________, ________, "lüa1n", ________, ________, ________, "jua1n", "qua1n", "xua1n", ________, ________, ________, ________, ________, ________, ________, - "yūn", ________, ________, ________, ________, ________, ________, ________, "lü1n", ________, ________, ________, "jūn", "qūn", "xūn", ________, ________, ________, ________, ________, ________, ________, - "wo1r", "ba1nr", "pi1r", "mia1nr", "fūr", "dia1nr", "ta1ngr", "na1r", ________, "ge1r", "ko1ur", "ha1ir", "ji1nr", ________, "xia1r", "zhe1r", ________, "shi1r", ________, ________, ________, ________, + "yu1n", ________, ________, ________, ________, ________, ________, ________, "lü1n", ________, ________, ________, "ju1n", "qu1n", "xu1n", ________, ________, ________, ________, ________, ________, ________, + "wo1r", "ba1nr", "pi1r", "mia1nr", "fu1r", "dia1nr", "ta1ngr", "na1r", ________, "ge1r", "ko1ur", "ha1ir", "ji1nr", ________, "xia1r", "zhe1r", ________, "shi1r", ________, ________, ________, ________, "wa1nr", ________, ________, ________, ________, "di1ngr", "tui1r", "nü1r", ________, "ga1nr", "ko1ngr", "ha1or", ________, ________, "xia1nr", ________, ________, "shui1r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hua1r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huo1r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hui1r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, -// 第二声 +// 第二声: 数字音调标记 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhi2", "chi2", "shi2", "ri2", "zi2", "ci2", "si2", "a2", "ba2", "pa2", "ma2", "fa2", "da2", "ta2", "na2", "la2", "ga2", "ka2", "ha2", ________, ________, ________, "zha2", "cha2", "sha2", ________, "za2", "ca2", "sa2", "e2", ________, ________, "me2", ________, "de2", "te2", "ne2", "le2", "ge2", "ke2", "he2", ________, ________, ________, "zhe2", "che2", "she2", "re2", "ze2", "ce2", "se2", @@ -308,31 +306,31 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ "yo2", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "ye2", "bie2", "pie2", "mie2", ________, "die2", "tie2", "nie2", "lie2", ________, ________, ________, "jie2", "qie2", "xie2", ________, ________, ________, ________, ________, ________, ________, "ya2o", "bia2o", "pia2o", "mia2o", "fia2o", "dia2o", "tia2o", "nia2o", "lia2o", ________, ________, ________, "jia2o", "qia2o", "xia2o", ________, ________, ________, ________, ________, ________, ________, - "yo2u", ________, ________, "miú", ________, "diú", ________, "niú", "liú", ________, ________, ________, "jiú", "qiú", "xiú", ________, ________, ________, ________, ________, ________, ________, + "yo2u", ________, ________, "miu2", ________, "diu2", ________, "niu2", "liu2", ________, ________, ________, "jiu2", "qiu2", "xiu2", ________, ________, ________, ________, ________, ________, ________, "ya2n", "bia2n", "pia2n", "mia2n", ________, "dia2n", "tia2n", "nia2n", "lia2n", ________, ________, ________, "jia2n", "qia2n", "xia2n", ________, ________, ________, ________, ________, ________, ________, "yi2n", "bi2n", "pi2n", "mi2n", ________, ________, ________, "ni2n", "li2n", ________, ________, ________, "ji2n", "qi2n", "xi2n", ________, ________, ________, ________, ________, ________, ________, "ya2ng", "bia2ng", ________, ________, ________, "dia2ng", ________, "nia2ng", "lia2ng", ________, ________, ________, "jia2ng", "qia2ng", "xia2ng", ________, ________, ________, ________, ________, ________, ________, "yi2ng", "bi2ng", "pi2ng", "mi2ng", ________, "di2ng", "ti2ng", "ni2ng", "li2ng", ________, ________, ________, "ji2ng", "qi2ng", "xi2ng", ________, ________, ________, ________, ________, ________, ________, "yo2ng", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "jio2ng", "qio2ng", "xio2ng", ________, ________, ________, ________, ________, ________, ________, - "wú", "bú", "pú", "mú", "fú", "dú", "tú", "nú", "lú", "gú", "kú", "hú", ________, ________, ________, "zhú", "chú", "shú", "rú", "zú", "cú", "sú", + "wu2", "bu2", "pu2", "mu2", "fu2", "du2", "tu2", "nu2", "lu2", "gu2", "ku2", "hu2", ________, ________, ________, "zhu2", "chu2", "shu2", "ru2", "zu2", "cu2", "su2", "wa2", ________, ________, ________, ________, ________, ________, ________, ________, "gua2", "kua2", "hua2", ________, ________, ________, "zhua2", "chua2", "shua2", "rua2", ________, ________, ________, "wo2", "bo2", "po2", "mo2", "fo2", "duo2", "tuo2", "nuo2", "luo2", "guo2", "kuo2", "huo2", ________, ________, ________, "zhuo2", "chuo2", "shuo2", "ruo2", "zuo2", "cuo2", "suo2", "wa2i", ________, ________, ________, ________, ________, ________, ________, ________, "gua2i", "kua2i", "hua2i", ________, ________, ________, "zhua2i", "chua2i", "shua2i", ________, ________, ________, ________, "we2i", ________, ________, ________, ________, "dui2", "tui2", ________, ________, "gui2", "kui2", "hui2", ________, ________, ________, "zhui2", "chui2", "shui2", "rui2", "zui2", "cui2", "sui2", "wa2n", ________, ________, ________, ________, "dua2n", "tua2n", "nua2n", "lua2n", "gua2n", "kua2n", "hua2n", ________, ________, ________, "zhua2n", "chua2n", "shua2n", "rua2n", "zua2n", "cua2n", "sua2n", - "we2n", ________, ________, ________, ________, "dún", "tún", "nún", "lún", "gún", "kún", "hún", ________, ________, ________, "zhún", "chún", "shún", "rún", "zún", "cún", "sún", + "we2n", ________, ________, ________, ________, "du2n", "tu2n", "nu2n", "lu2n", "gu2n", "ku2n", "hu2n", ________, ________, ________, "zhu2n", "chu2n", "shu2n", "ru2n", "zu2n", "cu2n", "su2n", "wa2ng", ________, ________, ________, ________, ________, ________, ________, ________, "gua2ng", "kua2ng", "hua2ng", ________, ________, ________, "zhua2ng", "chua2ng", "shua2ng", ________, ________, ________, ________, "we2ng", ________, ________, ________, ________, "do2ng", "to2ng", "no2ng", "lo2ng", "go2ng", "ko2ng", "ho2ng", ________, ________, ________, "zho2ng", "cho2ng", "sho2ng", "ro2ng", "zo2ng", "co2ng", "so2ng", - "yú", ________, ________, ________, ________, ________, ________, "nü2", "lü2", ________, ________, ________, "jú", "qú", "xú", ________, ________, ________, ________, ________, ________, ________, + "yu2", ________, ________, ________, ________, ________, ________, "nü2", "lü2", ________, ________, ________, "ju2", "qu2", "xu2", ________, ________, ________, ________, ________, ________, ________, "yue2", ________, ________, ________, ________, ________, ________, "nüe2", "lüe2", ________, ________, ________, "jue2", "que2", "xue2", ________, ________, ________, ________, ________, ________, ________, "yua2n", ________, ________, ________, ________, ________, ________, ________, "lüa2n", ________, ________, ________, "jua2n", "qua2n", "xua2n", ________, ________, ________, ________, ________, ________, ________, - "yún", ________, ________, ________, ________, ________, ________, ________, "lü2n", ________, ________, ________, "jún", "qún", "xún", ________, ________, ________, ________, ________, ________, ________, - "wo2r", "ba2nr", "pi2r", "mia2nr", "fúr", "dia2nr", "ta2ngr", "na2r", ________, "ge2r", "ko2ur", "ha2ir", "ji2nr", ________, "xia2r", "zhe2r", ________, "shi2r", ________, ________, ________, ________, + "yu2n", ________, ________, ________, ________, ________, ________, ________, "lü2n", ________, ________, ________, "ju2n", "qu2n", "xu2n", ________, ________, ________, ________, ________, ________, ________, + "wo2r", "ba2nr", "pi2r", "mia2nr", "fu2r", "dia2nr", "ta2ngr", "na2r", ________, "ge2r", "ko2ur", "ha2ir", "ji2nr", ________, "xia2r", "zhe2r", ________, "shi2r", ________, ________, ________, ________, "wa2nr", ________, ________, ________, ________, "di2ngr", "tui2r", "nü2r", ________, "ga2nr", "ko2ngr", "ha2or", ________, ________, "xia2nr", ________, ________, "shui2r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hua2r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huo2r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hui2r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, -// 第三声 +// 第三声: 数字音调标记 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhi3", "chi3", "shi3", "ri3", "zi3", "ci3", "si3", "a3", "ba3", "pa3", "ma3", "fa3", "da3", "ta3", "na3", "la3", "ga3", "ka3", "ha3", ________, ________, ________, "zha3", "cha3", "sha3", ________, "za3", "ca3", "sa3", "e3", ________, ________, "me3", ________, "de3", "te3", "ne3", "le3", "ge3", "ke3", "he3", ________, ________, ________, "zhe3", "che3", "she3", "re3", "ze3", "ce3", "se3", @@ -351,31 +349,31 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ "yo3", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "ye3", "bie3", "pie3", "mie3", ________, "die3", "tie3", "nie3", "lie3", ________, ________, ________, "jie3", "qie3", "xie3", ________, ________, ________, ________, ________, ________, ________, "ya3o", "bia3o", "pia3o", "mia3o", "fia3o", "dia3o", "tia3o", "nia3o", "lia3o", ________, ________, ________, "jia3o", "qia3o", "xia3o", ________, ________, ________, ________, ________, ________, ________, - "yo3u", ________, ________, "miǔ", ________, "diǔ", ________, "niǔ", "liǔ", ________, ________, ________, "jiǔ", "qiǔ", "xiǔ", ________, ________, ________, ________, ________, ________, ________, + "yo3u", ________, ________, "miu3", ________, "diu3", ________, "niu3", "liu3", ________, ________, ________, "jiu3", "qiu3", "xiu3", ________, ________, ________, ________, ________, ________, ________, "ya3n", "bia3n", "pia3n", "mia3n", ________, "dia3n", "tia3n", "nia3n", "lia3n", ________, ________, ________, "jia3n", "qia3n", "xia3n", ________, ________, ________, ________, ________, ________, ________, "yi3n", "bi3n", "pi3n", "mi3n", ________, ________, ________, "ni3n", "li3n", ________, ________, ________, "ji3n", "qi3n", "xi3n", ________, ________, ________, ________, ________, ________, ________, "ya3ng", "bia3ng", ________, ________, ________, "dia3ng", ________, "nia3ng", "lia3ng", ________, ________, ________, "jia3ng", "qia3ng", "xia3ng", ________, ________, ________, ________, ________, ________, ________, "yi3ng", "bi3ng", "pi3ng", "mi3ng", ________, "di3ng", "ti3ng", "ni3ng", "li3ng", ________, ________, ________, "ji3ng", "qi3ng", "xi3ng", ________, ________, ________, ________, ________, ________, ________, "yo3ng", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "jio3ng", "qio3ng", "xio3ng", ________, ________, ________, ________, ________, ________, ________, - "wǔ", "bǔ", "pǔ", "mǔ", "fǔ", "dǔ", "tǔ", "nǔ", "lǔ", "gǔ", "kǔ", "hǔ", ________, ________, ________, "zhǔ", "chǔ", "shǔ", "rǔ", "zǔ", "cǔ", "sǔ", + "wu3", "bu3", "pu3", "mu3", "fu3", "du3", "tu3", "nu3", "lu3", "gu3", "ku3", "hu3", ________, ________, ________, "zhu3", "chu3", "shu3", "ru3", "zu3", "cu3", "su3", "wa3", ________, ________, ________, ________, ________, ________, ________, ________, "gua3", "kua3", "hua3", ________, ________, ________, "zhua3", "chua3", "shua3", "rua3", ________, ________, ________, "wo3", "bo3", "po3", "mo3", "fo3", "duo3", "tuo3", "nuo3", "luo3", "guo3", "kuo3", "huo3", ________, ________, ________, "zhuo3", "chuo3", "shuo3", "ruo3", "zuo3", "cuo3", "suo3", "wa3i", ________, ________, ________, ________, ________, ________, ________, ________, "gua3i", "kua3i", "hua3i", ________, ________, ________, "zhua3i", "chua3i", "shua3i", ________, ________, ________, ________, "we3i", ________, ________, ________, ________, "dui3", "tui3", ________, ________, "gui3", "kui3", "hui3", ________, ________, ________, "zhui3", "chui3", "shui3", "rui3", "zui3", "cui3", "sui3", "wa3n", ________, ________, ________, ________, "dua3n", "tua3n", "nua3n", "lua3n", "gua3n", "kua3n", "hua3n", ________, ________, ________, "zhua3n", "chua3n", "shua3n", "rua3n", "zua3n", "cua3n", "sua3n", - "we3n", ________, ________, ________, ________, "dǔn", "tǔn", "nǔn", "lǔn", "gǔn", "kǔn", "hǔn", ________, ________, ________, "zhǔn", "chǔn", "shǔn", "rǔn", "zǔn", "cǔn", "sǔn", + "we3n", ________, ________, ________, ________, "du3n", "tu3n", "nu3n", "lu3n", "gu3n", "ku3n", "hu3n", ________, ________, ________, "zhu3n", "chu3n", "shu3n", "ru3n", "zu3n", "cu3n", "su3n", "wa3ng", ________, ________, ________, ________, ________, ________, ________, ________, "gua3ng", "kua3ng", "hua3ng", ________, ________, ________, "zhua3ng", "chua3ng", "shua3ng", ________, ________, ________, ________, "we3ng", ________, ________, ________, ________, "do3ng", "to3ng", "no3ng", "lo3ng", "go3ng", "ko3ng", "ho3ng", ________, ________, ________, "zho3ng", "cho3ng", "sho3ng", "ro3ng", "zo3ng", "co3ng", "so3ng", - "yǔ", ________, ________, ________, ________, ________, ________, "nü3", "lü3", ________, ________, ________, "jǔ", "qǔ", "xǔ", ________, ________, ________, ________, ________, ________, ________, + "yu3", ________, ________, ________, ________, ________, ________, "nü3", "lü3", ________, ________, ________, "ju3", "qu3", "xu3", ________, ________, ________, ________, ________, ________, ________, "yue3", ________, ________, ________, ________, ________, ________, "nüe3", "lüe3", ________, ________, ________, "jue3", "que3", "xue3", ________, ________, ________, ________, ________, ________, ________, "yua3n", ________, ________, ________, ________, ________, ________, ________, "lüa3n", ________, ________, ________, "jua3n", "qua3n", "xua3n", ________, ________, ________, ________, ________, ________, ________, - "yǔn", ________, ________, ________, ________, ________, ________, ________, "lü3n", ________, ________, ________, "jǔn", "qǔn", "xǔn", ________, ________, ________, ________, ________, ________, ________, - "wo3r", "ba3nr", "pi3r", "mia3nr", "fǔr", "dia3nr", "ta3ngr", "na3r", ________, "ge3r", "ko3ur", "ha3ir", "ji3nr", ________, "xia3r", "zhe3r", ________, "shi3r", ________, ________, ________, ________, + "yu3n", ________, ________, ________, ________, ________, ________, ________, "lü3n", ________, ________, ________, "ju3n", "qu3n", "xu3n", ________, ________, ________, ________, ________, ________, ________, + "wo3r", "ba3nr", "pi3r", "mia3nr", "fu3r", "dia3nr", "ta3ngr", "na3r", ________, "ge3r", "ko3ur", "ha3ir", "ji3nr", ________, "xia3r", "zhe3r", ________, "shi3r", ________, ________, ________, ________, "wa3nr", ________, ________, ________, ________, "di3ngr", "tui3r", "nü3r", ________, "ga3nr", "ko3ngr", "ha3or", ________, ________, "xia3nr", ________, ________, "shui3r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hua3r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huo3r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hui3r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, -// 第四声 +// 第四声: 数字音调标记 ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "zhi4", "chi4", "shi4", "ri4", "zi4", "ci4", "si4", "a4", "ba4", "pa4", "ma4", "fa4", "da4", "ta4", "na4", "la4", "ga4", "ka4", "ha4", ________, ________, ________, "zha4", "cha4", "sha4", ________, "za4", "ca4", "sa4", "e4", ________, ________, "me4", ________, "de4", "te4", "ne4", "le4", "ge4", "ke4", "he4", ________, ________, ________, "zhe4", "che4", "she4", "re4", "ze4", "ce4", "se4", @@ -394,26 +392,26 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ "yo4", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "ye4", "bie4", "pie4", "mie4", ________, "die4", "tie4", "nie4", "lie4", ________, ________, ________, "jie4", "qie4", "xie4", ________, ________, ________, ________, ________, ________, ________, "ya4o", "bia4o", "pia4o", "mia4o", "fia4o", "dia4o", "tia4o", "nia4o", "lia4o", ________, ________, ________, "jia4o", "qia4o", "xia4o", ________, ________, ________, ________, ________, ________, ________, - "yo4u", ________, ________, "miù", ________, "diù", ________, "niù", "liù", ________, ________, ________, "jiù", "qiù", "xiù", ________, ________, ________, ________, ________, ________, ________, + "yo4u", ________, ________, "miu4", ________, "diu4", ________, "niu4", "liu4", ________, ________, ________, "jiu4", "qiu4", "xiu4", ________, ________, ________, ________, ________, ________, ________, "ya4n", "bia4n", "pia4n", "mia4n", ________, "dia4n", "tia4n", "nia4n", "lia4n", ________, ________, ________, "jia4n", "qia4n", "xia4n", ________, ________, ________, ________, ________, ________, ________, "yi4n", "bi4n", "pi4n", "mi4n", ________, ________, ________, "ni4n", "li4n", ________, ________, ________, "ji4n", "qi4n", "xi4n", ________, ________, ________, ________, ________, ________, ________, "ya4ng", "bia4ng", ________, ________, ________, "dia4ng", ________, "nia4ng", "lia4ng", ________, ________, ________, "jia4ng", "qia4ng", "xia4ng", ________, ________, ________, ________, ________, ________, ________, "yi4ng", "bi4ng", "pi4ng", "mi4ng", ________, "di4ng", "ti4ng", "ni4ng", "li4ng", ________, ________, ________, "ji4ng", "qi4ng", "xi4ng", ________, ________, ________, ________, ________, ________, ________, "yo4ng", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "jio4ng", "qio4ng", "xio4ng", ________, ________, ________, ________, ________, ________, ________, - "wù", "bù", "pù", "mù", "fù", "dù", "tù", "nù", "lù", "gù", "kù", "hù", ________, ________, ________, "zhù", "chù", "shù", "rù", "zù", "cù", "sù", + "wu4", "bu4", "pu4", "mu4", "fu4", "du4", "tu4", "nu4", "lu4", "gu4", "ku4", "hu4", ________, ________, ________, "zhu4", "chu4", "shu4", "ru4", "zu4", "cu4", "su4", "wa4", ________, ________, ________, ________, ________, ________, ________, ________, "gua4", "kua4", "hua4", ________, ________, ________, "zhua4", "chua4", "shua4", "rua4", ________, ________, ________, "wo4", "bo4", "po4", "mo4", "fo4", "duo4", "tuo4", "nuo4", "luo4", "guo4", "kuo4", "huo4", ________, ________, ________, "zhuo4", "chuo4", "shuo4", "ruo4", "zuo4", "cuo4", "suo4", "wa4i", ________, ________, ________, ________, ________, ________, ________, ________, "gua4i", "kua4i", "hua4i", ________, ________, ________, "zhua4i", "chua4i", "shua4i", ________, ________, ________, ________, "we4i", ________, ________, ________, ________, "dui4", "tui4", ________, ________, "gui4", "kui4", "hui4", ________, ________, ________, "zhui4", "chui4", "shui4", "rui4", "zui4", "cui4", "sui4", "wa4n", ________, ________, ________, ________, "dua4n", "tua4n", "nua4n", "lua4n", "gua4n", "kua4n", "hua4n", ________, ________, ________, "zhua4n", "chua4n", "shua4n", "rua4n", "zua4n", "cua4n", "sua4n", - "we4n", ________, ________, ________, ________, "dùn", "tùn", "nùn", "lùn", "gùn", "kùn", "hùn", ________, ________, ________, "zhùn", "chùn", "shùn", "rùn", "zùn", "cùn", "sùn", + "we4n", ________, ________, ________, ________, "du4n", "tu4n", "nu4n", "lu4n", "gu4n", "ku4n", "hu4n", ________, ________, ________, "zhu4n", "chu4n", "shu4n", "ru4n", "zu4n", "cu4n", "su4n", "wa4ng", ________, ________, ________, ________, ________, ________, ________, ________, "gua4ng", "kua4ng", "hua4ng", ________, ________, ________, "zhua4ng", "chua4ng", "shua4ng", ________, ________, ________, ________, "we4ng", ________, ________, ________, ________, "do4ng", "to4ng", "no4ng", "lo4ng", "go4ng", "ko4ng", "ho4ng", ________, ________, ________, "zho4ng", "cho4ng", "sho4ng", "ro4ng", "zo4ng", "co4ng", "so4ng", - "yù", ________, ________, ________, ________, ________, ________, "nü4", "lü4", ________, ________, ________, "jù", "qù", "xù", ________, ________, ________, ________, ________, ________, ________, + "yu4", ________, ________, ________, ________, ________, ________, "nü4", "lü4", ________, ________, ________, "ju4", "qu4", "xu4", ________, ________, ________, ________, ________, ________, ________, "yue4", ________, ________, ________, ________, ________, ________, "nüe4", "lüe4", ________, ________, ________, "jue4", "que4", "xue4", ________, ________, ________, ________, ________, ________, ________, "yua4n", ________, ________, ________, ________, ________, ________, ________, "lüa4n", ________, ________, ________, "jua4n", "qua4n", "xua4n", ________, ________, ________, ________, ________, ________, ________, - "yùn", ________, ________, ________, ________, ________, ________, ________, "lü4n", ________, ________, ________, "jùn", "qùn", "xùn", ________, ________, ________, ________, ________, ________, ________, - "wo4r", "ba4nr", "pi4r", "mia4nr", "fùr", "dia4nr", "ta4ngr", "na4r", ________, "ge4r", "ko4ur", "ha4ir", "ji4nr", ________, "xia4r", "zhe4r", ________, "shi4r", ________, ________, ________, ________, + "yu4n", ________, ________, ________, ________, ________, ________, ________, "lü4n", ________, ________, ________, "ju4n", "qu4n", "xu4n", ________, ________, ________, ________, ________, ________, ________, + "wo4r", "ba4nr", "pi4r", "mia4nr", "fu4r", "dia4nr", "ta4ngr", "na4r", ________, "ge4r", "ko4ur", "ha4ir", "ji4nr", ________, "xia4r", "zhe4r", ________, "shi4r", ________, ________, ________, ________, "wa4nr", ________, ________, ________, ________, "di4ngr", "tui4r", "nü4r", ________, "ga4nr", "ko4ngr", "ha4or", ________, ________, "xia4nr", ________, ________, "shui4r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "hua4r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, "huo4r", ________, ________, ________, ________, ________, ________, ________, ________, ________, ________, @@ -421,64 +419,96 @@ pub static PINYIN_TABLE: [&'static str; 8316] = [ ]; + // 16 Bits // 低 第1位 被用作指示拼音是否为简写形式 #[derive(PartialEq, Eq, Copy, Clone)] -pub struct PinYin(u16); +pub struct PinYin(pub u16); // NOTE: 稳定后, `pub` 需要改成 `pub(crate)` impl PinYin { + + // 需要自行移位 ( offset << 1) #[inline] pub const fn new_unchecked(offset: u16) -> Self { Self(offset) } + // 拼音表偏移量 #[inline] pub fn offset(&self) -> u16 { self.0 >> 1 } + // 声调 #[inline] pub fn tone(&self) -> Tone { - // Len: 924 * 5 = 4620 + // Len: 924 + 924 * 4 * 2 = 8316 + let offset = self.offset(); + match offset { + 0 ..= 923 => Tone::Neutral, + 924 ..= 1847 => Tone::First, + 1848 ..= 2771 => Tone::Second, + 2772 ..= 3695 => Tone::Third, + 3696 ..= 4619 => Tone::Fourth, + + 4620 ..= 5543 => Tone::First, + 5544 ..= 6467 => Tone::Second, + 6468 ..= 7391 => Tone::Third, + 7392 ..= 8315 => Tone::Fourth, + _ => unreachable!(), + } + } + // 当前拼音音调标记格式 + #[inline] + pub fn tone_format(&self) -> ToneFormat { let offset = self.offset(); match offset { - 0 ... 923 => Tone::Neutral, - 924 ... 1847 => Tone::First, - 1848 ... 2771 => Tone::Second, - 2772 ... 3695 => Tone::Third, - 3696 ... 4619 => Tone::Fourth, - - 4620 ... 5543 => Tone::First, - 5544 ... 6467 => Tone::Second, - 6468 ... 7391 => Tone::Third, - 7392 ... 8315 => Tone::Fourth, + 0 ..= 923 => ToneFormat::Plain, + + 924 ..= 1847 => ToneFormat::Mark, + 1848 ..= 2771 => ToneFormat::Mark, + 2772 ..= 3695 => ToneFormat::Mark, + 3696 ..= 4619 => ToneFormat::Mark, + + 4620 ..= 5543 => ToneFormat::Number, + 5544 ..= 6467 => ToneFormat::Number, + 6468 ..= 7391 => ToneFormat::Number, + 7392 ..= 8315 => ToneFormat::Number, _ => unreachable!(), } } + // 元音字母 + pub fn vowel(&self) -> Letter { + unimplemented!() + } + + // 携带音调的元音字母 + pub fn tone_mark(&self) -> Letter { + unimplemented!() + } + /// 不带声调以及不使用简写字母的形式 #[inline] - pub fn plain(&self) -> &'static str { + pub fn plain(&self) -> Self { let offset = self.offset(); let idx = match offset { - 0 ... 923 => offset, - 924 ... 1847 => offset - 924, - 1848 ... 2771 => offset - 1848, - 2772 ... 3695 => offset - 2772, - 3696 ... 4619 => offset - 3696, - - 4620 ... 5543 => offset - 4620, - 5544 ... 6467 => offset - 5544, - 6468 ... 7391 => offset - 6468, - 7392 ... 8315 => offset - 7392, + 0 ..= 923 => offset, + 924 ..= 1847 => offset - 924, + 1848 ..= 2771 => offset - 1848, + 2772 ..= 3695 => offset - 2772, + 3696 ..= 4619 => offset - 3696, + + 4620 ..= 5543 => offset - 4620, + 5544 ..= 6467 => offset - 5544, + 6468 ..= 7391 => offset - 6468, + 7392 ..= 8315 => offset - 7392, _ => unreachable!(), }; - let s = PINYIN_TABLE[idx as usize]; - debug_assert!(s != ________); - s + Self(idx << 1) } #[inline] @@ -486,8 +516,39 @@ impl PinYin { self.0 & 0b0000_0000_0000_0001 == 1 } + // 松散数据 + // 声母部分 + #[inline] + pub fn initials(&self) -> Option<&'static str> { + match self.consonant() { + Some(consonant) => Some(consonant.as_str()), + None => { + // 支持输出 Y/W + let s = self.as_str(); + if s.starts_with("j") { + Some("j") + } else if s.starts_with("w") { + Some("w") + } else { + None + } + } + } + } + + // 韵母部分,改写或补写后的形式 + // 不携带声调 以及简写形式字母 + #[inline] + pub fn finals(&self) -> &'static str { + // TODO + unimplemented!() + } + + + // 结构化数据 + // 声母 #[inline] - pub fn initials(&self) -> Option { + pub fn consonant(&self) -> Option { const INITIALS_NUM: u16 = 22; // NOTE: 21 + 1 , 添加了一个站位声母 let idx = self.offset() % INITIALS_NUM; @@ -495,22 +556,24 @@ impl PinYin { return None; } - // TODO: 需要处理补写和转写的规则 - // 如 `Y` 和 `W` 以及 `J/Q/X` let idx = idx - 1; debug_assert!((idx as usize) < INITIAL_TABLE.len()); let offset = idx as u8; + let initial = Initial(offset << 1); + if self.is_simplified() { - Some(Initial( (offset << 1) | 0b0000_0001 )) + Some(initial.simplified()) } else { - Some(Initial(offset)) + Some(initial) } } + // 韵母 (原始形式) #[inline] - pub fn finals(&self) -> Rhyme { + pub fn rhyme(&self) -> Rhyme { + // TODO unimplemented!() } @@ -520,43 +583,91 @@ impl PinYin { if self.is_simplified() { *self } else { - Self(self.0 | 0b0000_0000_0000_1000 | (self.0 & 0b0000_0000_0000_0111) ) + Self(self.0 | 0b0000_0000_0000_0001) } } // 重新设置音调 #[inline] pub fn with_tone(&self, tone: Tone) -> Self { - match tone { - Tone::Neutral => Self(self.0 | 0b0000_0000_0000_0000), - Tone::First => Self(self.0 | 0b0000_0000_0000_0001), - Tone::Second => Self(self.0 | 0b0000_0000_0000_0010), - Tone::Third => Self(self.0 | 0b0000_0000_0000_0011), - Tone::Fourth => Self(self.0 | 0b0000_0000_0000_0100), - } + let tone_format = self.tone_format(); + + self.format(tone, tone_format) + } + + // 重新设置音调的标记形式 + #[inline] + pub fn with_tone_fotmat(&self, tone_format: ToneFormat) -> Self { + let tone = self.tone(); + + self.format(tone, tone_format) } // 格式化输出 #[inline] - pub fn format(&self, fmt: ToneFormat) -> String { - let initials = self.initials(); - let finals = self.finals(); - - format!("{}{}", - initials.map(|initials| initials.format(fmt)).unwrap_or(""), - finals.format(fmt) - ) + pub fn format(&self, tone: Tone, tone_format: ToneFormat) -> Self { + use crate::tone::ToneFormat::*; + use crate::tone::Tone::*; + + let plain = self.plain(); + let plain_offset = plain.offset(); + + let offset = match tone_format { + Plain => plain_offset, + Mark => { + match tone { + Neutral => plain_offset, + First => plain_offset + 924 * 1, + Second => plain_offset + 924 * 2, + Third => plain_offset + 924 * 3, + Fourth => plain_offset + 924 * 4, + } + }, + Number => { + match tone { + Neutral => plain_offset, + First => plain_offset + 924 * 4 + 924 * 1, + Second => plain_offset + 924 * 4 + 924 * 2, + Third => plain_offset + 924 * 4 + 924 * 3, + Fourth => plain_offset + 924 * 4 + 924 * 4, + } + }, + }; + + if self.is_simplified() { + Self(offset << 1).simplified() + } else { + Self(offset << 1) + } + } + + #[inline] + pub fn as_str(&self) -> &'static str { + // TODO: 处理简写形式 ( 需要为列建立一个对应表 ) + let s = PINYIN_TABLE[self.offset() as usize]; + debug_assert!(s != ________); + + s } } impl fmt::Debug for PinYin { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self.format(ToneFormat::Symbol)) + write!(f, "{:?}", self.as_str()) } } impl fmt::Display for PinYin { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.format(ToneFormat::Symbol)) + write!(f, "{}", self.as_str()) } -} \ No newline at end of file +} + +#[test] +fn test_format() { + let s = PinYin::new_unchecked(21 << 1); + assert_eq!(s.as_str(), "si"); + assert_eq!(s.format(Tone::Second, ToneFormat::Plain).as_str(), "si"); + assert_eq!(s.format(Tone::Second, ToneFormat::Mark).as_str(), "sí"); + assert_eq!(s.format(Tone::Second, ToneFormat::Number).as_str(), "si2"); +} diff --git a/crates/syllable/src/rhyme.rs b/crates/syllable/src/rhyme.rs index 6e1f43f..574adf3 100644 --- a/crates/syllable/src/rhyme.rs +++ b/crates/syllable/src/rhyme.rs @@ -155,9 +155,9 @@ impl Rhyme { pub fn has_single_vowel(&self) -> bool { // 单元音 韵母 match self.0 { - 0 ... 5 => true, - 16 ... 21 => true, - 32 ... 37 => true, + 0 ..= 5 => true, + 16 ..= 21 => true, + 32 ..= 37 => true, // NOTE: 鼻音需要加入判断中来吗? _ => false, } @@ -167,9 +167,9 @@ impl Rhyme { pub fn has_multi_vowel(&self) -> bool { // 复元音 韵母 match self.0 { - 6 ... 9 => true, - 22 ... 25 => true, - 38 ... 41 => true, + 6 ..= 9 => true, + 22 ..= 25 => true, + 38 ..= 41 => true, // NOTE: 鼻音需要加入判断中来吗? _ => false, } @@ -179,9 +179,9 @@ impl Rhyme { pub fn has_nasal(&self) -> bool { // 携带鼻音的韵母 match self.0 { - 10 ... 15 => true, - 26 ... 31 => true, - 42 ... 47 => true, + 10 ..= 15 => true, + 26 ..= 31 => true, + 42 ..= 47 => true, _ => false, } } @@ -300,40 +300,42 @@ impl Rhyme { // 格式化输出韵母部分 (注: 这个输出不会应用 补写和改写 规则) #[inline] pub fn format(&self, fmt: ToneFormat) -> &'static str { - match fmt { - ToneFormat::Ignore => { - self.as_str() - }, - ToneFormat::Symbol => { - unimplemented!() - }, - ToneFormat::Digit => { - unimplemented!() - }, - ToneFormat::Index => { - unimplemented!() - }, - } + // match fmt { + // ToneFormat::Ignore => { + // self.as_str() + // }, + // ToneFormat::Symbol => { + // unimplemented!() + // }, + // ToneFormat::Digit => { + // unimplemented!() + // }, + // ToneFormat::Index => { + // unimplemented!() + // }, + // } + unimplemented!() } // 格式化输出韵母部分 // 注: 这个输出会根据 声母部分来 应用 补写和改写 规则 #[inline] pub fn format_with_initials(&self, _initials: Option, fmt: ToneFormat) -> &'static str { - match fmt { - ToneFormat::Ignore => { - self.as_str() - }, - ToneFormat::Symbol => { - unimplemented!() - }, - ToneFormat::Digit => { - unimplemented!() - }, - ToneFormat::Index => { - unimplemented!() - }, - } + unimplemented!() + // match fmt { + // ToneFormat::Ignore => { + // self.as_str() + // }, + // ToneFormat::Symbol => { + // unimplemented!() + // }, + // ToneFormat::Digit => { + // unimplemented!() + // }, + // ToneFormat::Index => { + // unimplemented!() + // }, + // } } } diff --git a/crates/syllable/src/tone.rs b/crates/syllable/src/tone.rs index 78b61ad..860bf72 100644 --- a/crates/syllable/src/tone.rs +++ b/crates/syllable/src/tone.rs @@ -9,18 +9,30 @@ use std::fmt; // 声调: ˉ ˊ ˇ ˋ ˙ +// +// 调值: tone pitch // 上标音高 +// 调符: tone mark // 音调修饰符 +// 调值: tone value // 数字音调 /// 音调标记方式 #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum ToneFormat { /// 不标示音调 - Ignore, + Plain, + /// 带音调符号的拼音字母 ( fan, fān ) - Symbol, - /// 数字法 ( fan, fɑn⁵⁵, fan³⁵ ) - Digit, - /// 声序法 ( fan, fan1 ) - Index, + Mark, + + // NOTE: 不支持 + // /// 调值标记法 ( fan, fɑn⁵⁵, fan³⁵ ) + // Pitch, + + // 数字标记法 ( fan, fa1n ) + Number, + + // NOTE: 暂不支持 + // /// 声序法 ( fan, fan1 ) + // Index, }