From c8d18fd29e2edc9f2ef1d6fa59a7036054a5f860 Mon Sep 17 00:00:00 2001 From: smdn Date: Thu, 22 Dec 2022 19:31:53 +0900 Subject: [PATCH] change namespaces to Smdn.Text.Ondulish --- .../Smdn.Text.Ondulish/KanaUtils.cs | 175 ++++--- .../Smdn.Text.Ondulish/Translator.cs | 491 +++++++++--------- .../Smdn.Text.Ondulish/KanaUtils.cs | 74 +-- .../Smdn.Text.Ondulish/Translator.cs | 218 ++++---- 4 files changed, 478 insertions(+), 480 deletions(-) diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs index 8953076..3adc9a2 100644 --- a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs +++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs @@ -25,111 +25,110 @@ using System; using System.Text; -namespace Smdn.Applications.OndulishTranslator { - public static class KanaUtils { - private const char wideHiraganaStart = '\u3041'; - private const char wideHiraganaEnd = '\u3096'; +namespace Smdn.Text.Ondulish; - private const char wideKatakanaStart = '\u30a1'; - private const char wideKatakanaEnd = '\u30f6'; +public static class KanaUtils { + private const char wideHiraganaStart = '\u3041'; + private const char wideHiraganaEnd = '\u3096'; - private const int offsetFromHiraganaToKatakana = ((int)wideKatakanaStart - (int)wideHiraganaStart); + private const char wideKatakanaStart = '\u30a1'; + private const char wideKatakanaEnd = '\u30f6'; - private const char wideKatakanaExEnd = '\u30fa'; + private const int offsetFromHiraganaToKatakana = ((int)wideKatakanaStart - (int)wideHiraganaStart); - private static readonly string[] wideToNarrowKatakanaMap = new[] { - "ァ", "ア", "ィ", "イ", "ゥ", "ウ", "ェ", "エ", "ォ", "オ", "カ", "ガ", "キ", "ギ", "ク", // 30A1 - 30AF - "グ", "ケ", "ゲ", "コ", "ゴ", "サ", "ザ", "シ", "ジ", "ス", "ズ", "セ", "ゼ", "ソ", "ゾ", "タ", // 30B0 - 30BF - "ダ", "チ", "ヂ", "ッ", "ツ", "ヅ", "テ", "デ", "ト", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", // 30C0 - 30CF - "バ", "パ", "ヒ", "ビ", "ピ", "フ", "ブ", "プ", "ヘ", "ベ", "ペ", "ホ", "ボ", "ポ", "マ", "ミ", // 30D0 - 30DF - "ム", "メ", "モ", "ャ", "ヤ", "ュ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ヮ", "ワ", // 30E0 - 30EF - "ヰ", "ヱ", "ヲ", "ン", "ヴ", "ヵ", "ヶ", "ヷ", "ヸ", "ヹ", "ヺ", // 30F0 - 30FA - }; + private const char wideKatakanaExEnd = '\u30fa'; - public static string ConvertWideHiraganaToKatakana(string input) - { + private static readonly string[] wideToNarrowKatakanaMap = new[] { + "ァ", "ア", "ィ", "イ", "ゥ", "ウ", "ェ", "エ", "ォ", "オ", "カ", "ガ", "キ", "ギ", "ク", // 30A1 - 30AF + "グ", "ケ", "ゲ", "コ", "ゴ", "サ", "ザ", "シ", "ジ", "ス", "ズ", "セ", "ゼ", "ソ", "ゾ", "タ", // 30B0 - 30BF + "ダ", "チ", "ヂ", "ッ", "ツ", "ヅ", "テ", "デ", "ト", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", // 30C0 - 30CF + "バ", "パ", "ヒ", "ビ", "ピ", "フ", "ブ", "プ", "ヘ", "ベ", "ペ", "ホ", "ボ", "ポ", "マ", "ミ", // 30D0 - 30DF + "ム", "メ", "モ", "ャ", "ヤ", "ュ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ヮ", "ワ", // 30E0 - 30EF + "ヰ", "ヱ", "ヲ", "ン", "ヴ", "ヵ", "ヶ", "ヷ", "ヸ", "ヹ", "ヺ", // 30F0 - 30FA + }; + + public static string ConvertWideHiraganaToKatakana(string input) + { #if NETFRAMEWORK - var inputChars = input.ToCharArray(); - var outputChars = new char[inputChars.Length]; + var inputChars = input.ToCharArray(); + var outputChars = new char[inputChars.Length]; + + for (var index = 0; index < inputChars.Length; index++) { + if (wideHiraganaStart <= inputChars[index] && inputChars[index] <= wideHiraganaEnd) + outputChars[index] = (char)((int)inputChars[index] + offsetFromHiraganaToKatakana); + else + outputChars[index] = inputChars[index]; + } - for (var index = 0; index < inputChars.Length; index++) { - if (wideHiraganaStart <= inputChars[index] && inputChars[index] <= wideHiraganaEnd) - outputChars[index] = (char)((int)inputChars[index] + offsetFromHiraganaToKatakana); + return new string(outputChars); +#else + return string.Create(input.Length, input, (chars, s) => { + for (var index = 0; index < chars.Length; index++) { + if (wideHiraganaStart <= s[index] && s[index] <= wideHiraganaEnd) + chars[index] = (char)(s[index] + offsetFromHiraganaToKatakana); else - outputChars[index] = inputChars[index]; + chars[index] = s[index]; } - - return new string(outputChars); -#else - return string.Create(input.Length, input, (chars, s) => { - for (var index = 0; index < chars.Length; index++) { - if (wideHiraganaStart <= s[index] && s[index] <= wideHiraganaEnd) - chars[index] = (char)(s[index] + offsetFromHiraganaToKatakana); - else - chars[index] = s[index]; - } - }); + }); #endif - } + } - public static string ConvertWideKatakanaToHiragana(string input) - { + public static string ConvertWideKatakanaToHiragana(string input) + { #if NETFRAMEWORK - var inputChars = input.ToCharArray(); - var outputChars = new char[inputChars.Length]; - - for (var index = 0; index < inputChars.Length; index++) { - if (wideKatakanaStart <= inputChars[index] && inputChars[index] <= wideKatakanaEnd) - outputChars[index] = (char)((int)inputChars[index] - offsetFromHiraganaToKatakana); - else - outputChars[index] = inputChars[index]; - } - - return new string(outputChars); -#else - return string.Create(input.Length, input, (chars, s) => { - for (var index = 0; index < chars.Length; index++) { - if (wideKatakanaStart <= s[index] && s[index] <= wideKatakanaEnd) - chars[index] = (char)(s[index] - offsetFromHiraganaToKatakana); - else - chars[index] = s[index]; - } - }); -#endif + var inputChars = input.ToCharArray(); + var outputChars = new char[inputChars.Length]; + + for (var index = 0; index < inputChars.Length; index++) { + if (wideKatakanaStart <= inputChars[index] && inputChars[index] <= wideKatakanaEnd) + outputChars[index] = (char)((int)inputChars[index] - offsetFromHiraganaToKatakana); + else + outputChars[index] = inputChars[index]; } - public static string ConvertWideKatakanaToNarrowKatakana(string input) - { - var inputChars = input.ToCharArray(); - var output = new StringBuilder(); - - for (var index = 0; index < inputChars.Length; index++) { - if (wideKatakanaStart <= inputChars[index] && inputChars[index] <= wideKatakanaExEnd) - output.Append(wideToNarrowKatakanaMap[inputChars[index] - wideKatakanaStart]); - else if (inputChars[index] == 'ー') - output.Append('ー'); - else if (inputChars[index] == '゛') - output.Append('゙'); - else if (inputChars[index] == '゜') - output.Append('゚'); - else if (inputChars[index] == '?') - output.Append('?'); - else if (inputChars[index] == '!') - output.Append('!'); - else if (inputChars[index] == '、') - output.Append('、'); - else if (inputChars[index] == '。') - output.Append('。'); - else if (inputChars[index] == ',') - output.Append(','); - else if (inputChars[index] == '.') - output.Append('.'); + return new string(outputChars); +#else + return string.Create(input.Length, input, (chars, s) => { + for (var index = 0; index < chars.Length; index++) { + if (wideKatakanaStart <= s[index] && s[index] <= wideKatakanaEnd) + chars[index] = (char)(s[index] - offsetFromHiraganaToKatakana); else - output.Append(inputChars[index]); + chars[index] = s[index]; } + }); +#endif + } - return output.ToString(); + public static string ConvertWideKatakanaToNarrowKatakana(string input) + { + var inputChars = input.ToCharArray(); + var output = new StringBuilder(); + + for (var index = 0; index < inputChars.Length; index++) { + if (wideKatakanaStart <= inputChars[index] && inputChars[index] <= wideKatakanaExEnd) + output.Append(wideToNarrowKatakanaMap[inputChars[index] - wideKatakanaStart]); + else if (inputChars[index] == 'ー') + output.Append('ー'); + else if (inputChars[index] == '゛') + output.Append('゙'); + else if (inputChars[index] == '゜') + output.Append('゚'); + else if (inputChars[index] == '?') + output.Append('?'); + else if (inputChars[index] == '!') + output.Append('!'); + else if (inputChars[index] == '、') + output.Append('、'); + else if (inputChars[index] == '。') + output.Append('。'); + else if (inputChars[index] == ',') + output.Append(','); + else if (inputChars[index] == '.') + output.Append('.'); + else + output.Append(inputChars[index]); } + + return output.ToString(); } } - diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs index 30669c5..ced9e64 100644 --- a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs +++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs @@ -33,315 +33,314 @@ using MeCab; using MeCabConsts = MeCab.MeCab; -namespace Smdn.Applications.OndulishTranslator { - public class Translator : IDisposable { - public IReadOnlyDictionary PhraseDictionary { get; } - public IReadOnlyDictionary WordDictionary { get; } +namespace Smdn.Text.Ondulish; - private Tagger tagger; +public class Translator : IDisposable { + public IReadOnlyDictionary PhraseDictionary { get; } + public IReadOnlyDictionary WordDictionary { get; } - public Translator(string taggerArgs, string dictionaryDirectory) - { - tagger = new Tagger(taggerArgs); + private Tagger tagger; - PhraseDictionary = LoadDictionary(System.IO.Path.Combine(dictionaryDirectory, "dictionary-phrases.csv")); - WordDictionary = LoadDictionary(System.IO.Path.Combine(dictionaryDirectory, "dictionary-words.csv")); - } + public Translator(string taggerArgs, string dictionaryDirectory) + { + tagger = new Tagger(taggerArgs); - private static readonly char[] dictionaryPunctuationChars = new[] {'!', '?', '!', '?', '、', '。'}; + PhraseDictionary = LoadDictionary(System.IO.Path.Combine(dictionaryDirectory, "dictionary-phrases.csv")); + WordDictionary = LoadDictionary(System.IO.Path.Combine(dictionaryDirectory, "dictionary-words.csv")); + } - private static SortedList LoadDictionary(string dictionaryPath) - { - var dictionary = new SortedList(new WordDictionaryComparer()); + private static readonly char[] dictionaryPunctuationChars = new[] {'!', '?', '!', '?', '、', '。'}; - using (var reader = new CsvReader(dictionaryPath, Encoding.UTF8)) { - foreach (var entries in reader.ReadRecords()) { - if (entries.Count < 3) - continue; + private static SortedList LoadDictionary(string dictionaryPath) + { + var dictionary = new SortedList(new WordDictionaryComparer()); - var entry = entries[0].Trim(); + using (var reader = new CsvReader(dictionaryPath, Encoding.UTF8)) { + foreach (var entries in reader.ReadRecords()) { + if (entries.Count < 3) + continue; - if (entry.StartsWith('#')) - continue; // comment line + var entry = entries[0].Trim(); - var key = entries[1].Trim().RemoveChars(dictionaryPunctuationChars); + if (entry.StartsWith('#')) + continue; // comment line - dictionary[KanaUtils.ConvertWideHiraganaToKatakana(key)] = entries[2].Trim(); - } + var key = entries[1].Trim().RemoveChars(dictionaryPunctuationChars); + + dictionary[KanaUtils.ConvertWideHiraganaToKatakana(key)] = entries[2].Trim(); } + } #if false - foreach (var e in dictionary) { - Console.WriteLine("{0} => {1}", e.Key, e.Value); - } + foreach (var e in dictionary) { + Console.WriteLine("{0} => {1}", e.Key, e.Value); + } #endif - return dictionary; - } + return dictionary; + } - private class WordDictionaryComparer : IComparer { - public int Compare(string x, string y) - => x.Length == y.Length - ? StringComparer.Ordinal.Compare(x, y) - : y.Length - x.Length; - } + private class WordDictionaryComparer : IComparer { + public int Compare(string x, string y) + => x.Length == y.Length + ? StringComparer.Ordinal.Compare(x, y) + : y.Length - x.Length; + } - public void Dispose() - { - tagger?.Dispose(); - tagger = null; - } + public void Dispose() + { + tagger?.Dispose(); + tagger = null; + } - public string Translate(string input, bool convertKatakanaToNarrow) - { - var sb = new StringBuilder(input.Length * 2); - var sw = new StringWriter(sb); + public string Translate(string input, bool convertKatakanaToNarrow) + { + var sb = new StringBuilder(input.Length * 2); + var sw = new StringWriter(sb); - Translate(input, convertKatakanaToNarrow, sw); + Translate(input, convertKatakanaToNarrow, sw); - return sb.ToString(); - } + return sb.ToString(); + } - public void Translate(string input, bool convertKatakanaToNarrow, TextWriter output) - { - var reader = new StringReader(input); + public void Translate(string input, bool convertKatakanaToNarrow, TextWriter output) + { + var reader = new StringReader(input); - for (var line = reader.ReadLine(); line is not null; line = reader.ReadLine()) { - if (string.IsNullOrWhiteSpace(line)) { - output.WriteLine(line); - continue; - } + for (var line = reader.ReadLine(); line is not null; line = reader.ReadLine()) { + if (string.IsNullOrWhiteSpace(line)) { + output.WriteLine(line); + continue; + } - var fragments = - ConvertWithDictionary( - ConvertToKatakana(line), - PhraseDictionary - ) - .SelectMany(f => - f.ConvertedText == null - ? ConvertWithDictionary(f.SourceText, WordDictionary) - : Enumerable.Repeat(f, 1) - ) - .SelectMany(f => - f.ConvertedText == null - ? ConvertWithDictionary(f.SourceText, phonemeDictionary) - : Enumerable.Repeat(f, 1) + var fragments = + ConvertWithDictionary( + ConvertToKatakana(line), + PhraseDictionary + ) + .SelectMany(f => + f.ConvertedText == null + ? ConvertWithDictionary(f.SourceText, WordDictionary) + : Enumerable.Repeat(f, 1) + ) + .SelectMany(f => + f.ConvertedText == null + ? ConvertWithDictionary(f.SourceText, phonemeDictionary) + : Enumerable.Repeat(f, 1) + ) + .Select(f => + new TextFragment( + f.SourceText, + f.ConvertedText ?? KanaUtils.ConvertWideHiraganaToKatakana(f.SourceText) // redundant? ) - .Select(f => - new TextFragment( - f.SourceText, - f.ConvertedText ?? KanaUtils.ConvertWideHiraganaToKatakana(f.SourceText) // redundant? - ) - ); - - if (convertKatakanaToNarrow) - fragments = fragments.Select(f => - new TextFragment( - f.SourceText, - KanaUtils.ConvertWideKatakanaToNarrowKatakana(f.ConvertedText) - ) - ); - - output.WriteLine( - string.Concat( - fragments.Select(fragment => fragment.ConvertedText) + ); + + if (convertKatakanaToNarrow) + fragments = fragments.Select(f => + new TextFragment( + f.SourceText, + KanaUtils.ConvertWideKatakanaToNarrowKatakana(f.ConvertedText) ) ); - } - output.Flush(); + output.WriteLine( + string.Concat( + fragments.Select(fragment => fragment.ConvertedText) + ) + ); } - private static readonly char[] featureSplitter = new[] {','}; + output.Flush(); + } - private string ConvertToKatakana(string input) - { - input = input.Replace(",", ","); // XXX: feature splitter + private static readonly char[] featureSplitter = new[] {','}; - var ret = new StringBuilder(input.Length * 2); + private string ConvertToKatakana(string input) + { + input = input.Replace(",", ","); // XXX: feature splitter - for (var node = tagger.parseToNode(input); node != null; node = node.next) { - if (node.stat == MeCabConsts.MECAB_BOS_NODE || node.stat == MeCabConsts.MECAB_EOS_NODE) - continue; + var ret = new StringBuilder(input.Length * 2); + + for (var node = tagger.parseToNode(input); node != null; node = node.next) { + if (node.stat == MeCabConsts.MECAB_BOS_NODE || node.stat == MeCabConsts.MECAB_EOS_NODE) + continue; #if false - Console.WriteLine("feature: {0}", node.feature); + Console.WriteLine("feature: {0}", node.feature); #endif - var featureEntries = node.feature.Split(featureSplitter); + var featureEntries = node.feature.Split(featureSplitter); - if (8 <= featureEntries.Length) { - switch (featureEntries[6]) { - case "ぶっ殺す": ret.Append("ブッコロス"); break; // ipadic says 'ぶっとばす' - default: ret.Append(featureEntries[7]); break; - } - } - else { - ret.Append(node.surface); + if (8 <= featureEntries.Length) { + switch (featureEntries[6]) { + case "ぶっ殺す": ret.Append("ブッコロス"); break; // ipadic says 'ぶっとばす' + default: ret.Append(featureEntries[7]); break; } } + else { + ret.Append(node.surface); + } + } #if false - Console.WriteLine("{0} {1}", input, ret); + Console.WriteLine("{0} {1}", input, ret); #endif - return ret.ToString(); - } + return ret.ToString(); + } - readonly struct TextFragment { - public readonly string SourceText; - public readonly string ConvertedText; + readonly struct TextFragment { + public readonly string SourceText; + public readonly string ConvertedText; - public TextFragment(string sourceText, string convertedText) - { - this.SourceText = sourceText; - this.ConvertedText = convertedText; - } - } - - private static bool FindMostLeftAndLongestCandidate( - string input, - int startIndex, - IReadOnlyDictionary dictionary, - out int position, - out KeyValuePair candidate - ) + public TextFragment(string sourceText, string convertedText) { - position = int.MaxValue; - candidate = default; - - foreach (var entry in dictionary) { - var pos = input.IndexOf(entry.Key, startIndex, StringComparison.Ordinal); + this.SourceText = sourceText; + this.ConvertedText = convertedText; + } + } - if (0 <= pos && pos < position) { - position = pos; - candidate = entry; - } + private static bool FindMostLeftAndLongestCandidate( + string input, + int startIndex, + IReadOnlyDictionary dictionary, + out int position, + out KeyValuePair candidate + ) + { + position = int.MaxValue; + candidate = default; + + foreach (var entry in dictionary) { + var pos = input.IndexOf(entry.Key, startIndex, StringComparison.Ordinal); + + if (0 <= pos && pos < position) { + position = pos; + candidate = entry; } - - return position != int.MaxValue; } - private static IEnumerable ConvertWithDictionary( - string input, - IReadOnlyDictionary dictionary - ) - { - var offset = 0; + return position != int.MaxValue; + } - while (FindMostLeftAndLongestCandidate(input, offset, dictionary, out var position, out var candidate)) { - if (offset < position) - yield return new TextFragment(input.Substring(offset, position - offset), null); + private static IEnumerable ConvertWithDictionary( + string input, + IReadOnlyDictionary dictionary + ) + { + var offset = 0; - yield return new TextFragment(candidate.Key, candidate.Value); + while (FindMostLeftAndLongestCandidate(input, offset, dictionary, out var position, out var candidate)) { + if (offset < position) + yield return new TextFragment(input.Substring(offset, position - offset), null); - offset = position + candidate.Key.Length; - } + yield return new TextFragment(candidate.Key, candidate.Value); - yield return new TextFragment(input.Substring(offset), null); + offset = position + candidate.Key.Length; } - private class ReadOnlyOrderedDictionary : IReadOnlyDictionary { - private readonly IReadOnlyList> dictionary; + yield return new TextFragment(input.Substring(offset), null); + } - public TValue this[TKey key] => throw new NotImplementedException(); - public IEnumerable Keys => throw new NotImplementedException(); - public IEnumerable Values => throw new NotImplementedException(); - public int Count => dictionary.Count; + private class ReadOnlyOrderedDictionary : IReadOnlyDictionary { + private readonly IReadOnlyList> dictionary; - public ReadOnlyOrderedDictionary(IEnumerable<(TKey key, TValue value)> dictionary) - : this( - (dictionary ?? throw new ArgumentNullException(nameof(dictionary))) - .Select(pair => new KeyValuePair(pair.key, pair.value)) - .ToList() - ) - { } + public TValue this[TKey key] => throw new NotImplementedException(); + public IEnumerable Keys => throw new NotImplementedException(); + public IEnumerable Values => throw new NotImplementedException(); + public int Count => dictionary.Count; - public ReadOnlyOrderedDictionary(IReadOnlyList> dictionary) - { - this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary)); - } + public ReadOnlyOrderedDictionary(IEnumerable<(TKey key, TValue value)> dictionary) + : this( + (dictionary ?? throw new ArgumentNullException(nameof(dictionary))) + .Select(pair => new KeyValuePair(pair.key, pair.value)) + .ToList() + ) + { } - public bool ContainsKey(TKey key) - => throw new NotImplementedException(); + public ReadOnlyOrderedDictionary(IReadOnlyList> dictionary) + { + this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary)); + } - public IEnumerator> GetEnumerator() - => dictionary.GetEnumerator(); + public bool ContainsKey(TKey key) + => throw new NotImplementedException(); - public bool TryGetValue(TKey key, out TValue value) - => throw new NotImplementedException(); + public IEnumerator> GetEnumerator() + => dictionary.GetEnumerator(); - IEnumerator IEnumerable.GetEnumerator() - => dictionary.GetEnumerator(); - } + public bool TryGetValue(TKey key, out TValue value) + => throw new NotImplementedException(); - private readonly IReadOnlyDictionary phonemeDictionary = new ReadOnlyOrderedDictionary(new[] { - // 最優先 - ("ル", "ドゥ"), - ("ム", "ヴ"), - ("ボー", "ポッ"), - ("ドー", "ドゥー"), - ("スナ", "スダ"), - ("スルナ", "ドゥルダ"), - ("スル", "ドゥル"), - ("デモ", "デロ"), - ("ンヤ", "ッニャ"), - ("ネイ", "ニッ"), - ("ネエ", "ニェ"), - ("デス", "ディス"), - ("ウラ", "ルラ"), - ("トオ", "ドーゥ"), - ("いじゃ", "チョナ"), - ("とは", "トヴァ"), - - // 母音 - ("ア", "ア゛"), - ("ウ", "ル"), - ("ヤ", "ャ"), - - // 摩擦音 - ("サ", "ザァ"), - ("ス", "ズ"), - ("ゼ", "デ"), - - ("ハ", "ヴァ"), - ("ヒ", "ビィ"), - ("フ", "ヴ"), - ("ヘ", "ベ"), - ("ホ", "ボ"), - - ("ブ", "ム"), - - ("ゼ", "デ"), - - // 破裂音 - ("ク", "グ"), - ("キ", "ク"), - - ("タ", "ダ"), - ("チ", "ディ"), - ("ツ", "ヅ"), - ("テ", "デ"), - ("ト", "ドゥ"), - - ("ピ", "ヴィ"), - - // 鼻音 - ("ニ", "ディ"), - ("ヌ", "ズ"), - ("ネ", "ベ"), - ("ノ", "ド"), - - ("マ", "バ"), - ("ミ", "ヴィ"), - ("メ", "ベ"), - ("モ", "ボ"), - - // 流音 - ("リ", "ディ"), - ("レ", "リ"), - ("ロ", "ド"), - }); + IEnumerator IEnumerable.GetEnumerator() + => dictionary.GetEnumerator(); } -} + private readonly IReadOnlyDictionary phonemeDictionary = new ReadOnlyOrderedDictionary(new[] { + // 最優先 + ("ル", "ドゥ"), + ("ム", "ヴ"), + ("ボー", "ポッ"), + ("ドー", "ドゥー"), + ("スナ", "スダ"), + ("スルナ", "ドゥルダ"), + ("スル", "ドゥル"), + ("デモ", "デロ"), + ("ンヤ", "ッニャ"), + ("ネイ", "ニッ"), + ("ネエ", "ニェ"), + ("デス", "ディス"), + ("ウラ", "ルラ"), + ("トオ", "ドーゥ"), + ("いじゃ", "チョナ"), + ("とは", "トヴァ"), + + // 母音 + ("ア", "ア゛"), + ("ウ", "ル"), + ("ヤ", "ャ"), + + // 摩擦音 + ("サ", "ザァ"), + ("ス", "ズ"), + ("ゼ", "デ"), + + ("ハ", "ヴァ"), + ("ヒ", "ビィ"), + ("フ", "ヴ"), + ("ヘ", "ベ"), + ("ホ", "ボ"), + + ("ブ", "ム"), + + ("ゼ", "デ"), + + // 破裂音 + ("ク", "グ"), + ("キ", "ク"), + + ("タ", "ダ"), + ("チ", "ディ"), + ("ツ", "ヅ"), + ("テ", "デ"), + ("ト", "ドゥ"), + + ("ピ", "ヴィ"), + + // 鼻音 + ("ニ", "ディ"), + ("ヌ", "ズ"), + ("ネ", "ベ"), + ("ノ", "ド"), + + ("マ", "バ"), + ("ミ", "ヴィ"), + ("メ", "ベ"), + ("モ", "ボ"), + + // 流音 + ("リ", "ディ"), + ("レ", "リ"), + ("ロ", "ド"), + }); +} diff --git a/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs b/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs index d75c321..4d5a233 100644 --- a/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs +++ b/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs @@ -2,43 +2,43 @@ using NUnit.Framework; -namespace Smdn.Applications.OndulishTranslator { - [TestFixture] - public class KanaUtilsTests { - [TestCase("ABC", "ABC")] - [TestCase("abc", "abc")] - [TestCase("ABC", "ABC")] - [TestCase("abc", "abc")] - [TestCase("$%&", "$%&")] - [TestCase("あいうえお", "アイウエオ")] - [TestCase("アイウエオ", "アイウエオ")] - [TestCase("わをん", "ワヲン")] - [TestCase("日本語", "日本語")] - [TestCase("\u3040\u3041\u3096\u3097", "\u3040\u30a1\u30f6\u3097")] - public void TestConvertWideHiraganaToKatakana(string input, string expected) - { - Assert.AreEqual( - expected, - KanaUtils.ConvertWideHiraganaToKatakana(input) - ); - } +namespace Smdn.Text.Ondulish; - [TestCase("ABC", "ABC")] - [TestCase("abc", "abc")] - [TestCase("ABC", "ABC")] - [TestCase("abc", "abc")] - [TestCase("$%&", "$%&")] - [TestCase("アイウエオ", "あいうえお")] - [TestCase("あいうえお", "あいうえお")] - [TestCase("ワヲン", "わをん")] - [TestCase("日本語", "日本語")] - [TestCase("\u3040\u30a1\u30f6\u3097", "\u3040\u3041\u3096\u3097")] - public void TestConvertWideKatakanaToHiragana(string input, string expected) - { - Assert.AreEqual( - expected, - KanaUtils.ConvertWideKatakanaToHiragana(input) - ); - } +[TestFixture] +public class KanaUtilsTests { + [TestCase("ABC", "ABC")] + [TestCase("abc", "abc")] + [TestCase("ABC", "ABC")] + [TestCase("abc", "abc")] + [TestCase("$%&", "$%&")] + [TestCase("あいうえお", "アイウエオ")] + [TestCase("アイウエオ", "アイウエオ")] + [TestCase("わをん", "ワヲン")] + [TestCase("日本語", "日本語")] + [TestCase("\u3040\u3041\u3096\u3097", "\u3040\u30a1\u30f6\u3097")] + public void TestConvertWideHiraganaToKatakana(string input, string expected) + { + Assert.AreEqual( + expected, + KanaUtils.ConvertWideHiraganaToKatakana(input) + ); + } + + [TestCase("ABC", "ABC")] + [TestCase("abc", "abc")] + [TestCase("ABC", "ABC")] + [TestCase("abc", "abc")] + [TestCase("$%&", "$%&")] + [TestCase("アイウエオ", "あいうえお")] + [TestCase("あいうえお", "あいうえお")] + [TestCase("ワヲン", "わをん")] + [TestCase("日本語", "日本語")] + [TestCase("\u3040\u30a1\u30f6\u3097", "\u3040\u3041\u3096\u3097")] + public void TestConvertWideKatakanaToHiragana(string input, string expected) + { + Assert.AreEqual( + expected, + KanaUtils.ConvertWideKatakanaToHiragana(input) + ); } } \ No newline at end of file diff --git a/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs b/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs index 2724b93..04079d6 100644 --- a/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs +++ b/tests/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs @@ -4,133 +4,133 @@ using NUnit.Framework; -namespace Smdn.Applications.OndulishTranslator { - [TestFixture] - public class TranslatorTests { - private static Translator Create() - { - var codeBaseDir = Path.GetDirectoryName( +namespace Smdn.Text.Ondulish; + +[TestFixture] +public class TranslatorTests { + private static Translator Create() + { + var codeBaseDir = Path.GetDirectoryName( #if NET6_0_OR_GREATER - Path.GetDirectoryName(Environment.ProcessPath) + Path.GetDirectoryName(Environment.ProcessPath) #else - Assembly.GetEntryAssembly().Location + Assembly.GetEntryAssembly().Location #endif - ); - var taggerArg = "-r " + Path.Combine(codeBaseDir, "mecabrc"); + ); + var taggerArg = "-r " + Path.Combine(codeBaseDir, "mecabrc"); - return new Translator(taggerArg, dictionaryDirectory: codeBaseDir); - } + return new Translator(taggerArg, dictionaryDirectory: codeBaseDir); + } - [TestCase("オンドゥル", "オンドゥル")] - [TestCase("変身", "ヘシン")] - [TestCase("橘さん", "ダディャーナザァーン")] - [TestCase("本当に裏切ったんですか", "オンドゥルルラギッタンディスカー")] - [TestCase("決着を", "ケッチャコ")] - [TestCase("俺は貴様をぶっころす", "オレァクサムヲムッコロス")] - [TestCase("俺は貴様をぶっ殺す", "オレァクサムヲムッコロス")] - [TestCase("俺は!貴様を!", "オレァ!クサムヲ!")] - [TestCase("俺は!貴様か!", "オレァ!クサムカァ!")] - [TestCase("俺の体はボロボロだ", "オデノカラダハボドボドダ")] - [TestCase("あいうえお", "ア゛イルエオ")] - [TestCase("#$%&'", "#$%&'")] - [TestCase(@""","", "","",", @""","","","",")] - [TestCase(@"変身😆😄", @"ヘシン😆😄")] - [TestCase(@"オンドゥル😆😄", @"オンドゥル😆😄")] - public void TestTranslate(string input, string expected) - { - using (var t = Create()) { - Assert.AreEqual( - expected, - t.Translate(input, convertKatakanaToNarrow: false).TrimEnd() - ); - } + [TestCase("オンドゥル", "オンドゥル")] + [TestCase("変身", "ヘシン")] + [TestCase("橘さん", "ダディャーナザァーン")] + [TestCase("本当に裏切ったんですか", "オンドゥルルラギッタンディスカー")] + [TestCase("決着を", "ケッチャコ")] + [TestCase("俺は貴様をぶっころす", "オレァクサムヲムッコロス")] + [TestCase("俺は貴様をぶっ殺す", "オレァクサムヲムッコロス")] + [TestCase("俺は!貴様を!", "オレァ!クサムヲ!")] + [TestCase("俺は!貴様か!", "オレァ!クサムカァ!")] + [TestCase("俺の体はボロボロだ", "オデノカラダハボドボドダ")] + [TestCase("あいうえお", "ア゛イルエオ")] + [TestCase("#$%&'", "#$%&'")] + [TestCase(@""","", "","",", @""","","","",")] + [TestCase(@"変身😆😄", @"ヘシン😆😄")] + [TestCase(@"オンドゥル😆😄", @"オンドゥル😆😄")] + public void TestTranslate(string input, string expected) + { + using (var t = Create()) { + Assert.AreEqual( + expected, + t.Translate(input, convertKatakanaToNarrow: false).TrimEnd() + ); } + } - [TestCase("オンドゥル", "オンドゥル")] - [TestCase("変身", "ヘシン")] - [TestCase("橘さん", "ダディャーナザァーン")] - [TestCase("俺は貴様をぶっころす", "オレァクサムヲムッコロス")] - [TestCase(@"変身😆😄", @"ヘシン😆😄")] - [TestCase("あいするな", "ア゙イドゥルダ")] - public void TestTranslateToNarrowKatakana(string input, string expected) - { - using (var t = Create()) { - Assert.AreEqual( - expected, - t.Translate(input, convertKatakanaToNarrow: true).TrimEnd() - ); - } + [TestCase("オンドゥル", "オンドゥル")] + [TestCase("変身", "ヘシン")] + [TestCase("橘さん", "ダディャーナザァーン")] + [TestCase("俺は貴様をぶっころす", "オレァクサムヲムッコロス")] + [TestCase(@"変身😆😄", @"ヘシン😆😄")] + [TestCase("あいするな", "ア゙イドゥルダ")] + public void TestTranslateToNarrowKatakana(string input, string expected) + { + using (var t = Create()) { + Assert.AreEqual( + expected, + t.Translate(input, convertKatakanaToNarrow: true).TrimEnd() + ); } + } - [TestCase("相手は俺だ", "アンギョン和田")] // be translated terms with kanji chars - [TestCase("貴様、相手は俺だ", "チサマ、アンギョン和田")] - public void TestTranslate_SpecialCase(string input, string expected) - { - using (var t = Create()) { - Assert.AreEqual( - expected, - t.Translate(input, convertKatakanaToNarrow: false).TrimEnd() - ); - } + [TestCase("相手は俺だ", "アンギョン和田")] // be translated terms with kanji chars + [TestCase("貴様、相手は俺だ", "チサマ、アンギョン和田")] + public void TestTranslate_SpecialCase(string input, string expected) + { + using (var t = Create()) { + Assert.AreEqual( + expected, + t.Translate(input, convertKatakanaToNarrow: false).TrimEnd() + ); } + } - [TestCase("めかぶ", "ベカム")] - [TestCase("かてる", "カデドゥ")] - [TestCase("あいするな", "ア゛イドゥルダ")] - [TestCase("あいする", "ア゛イドゥル")] - [TestCase("あいすな", "ア゛イスダ")] - [TestCase("あいす", "ア゛イズ")] - [TestCase("あいでも", "ア゛イデロ")] - [TestCase("あいで", "ア゛イデ")] - [TestCase("あいに", "ア゛イディ")] - [TestCase("あいる", "ア゛イドゥ")] - [TestCase("ぼーる", "ポッドゥ")] - [TestCase("ばーる", "バードゥ")] - [TestCase("おんどぅる", "オンドゥル")] - [TestCase("おんどぅ", "オンドゥ")] - public void TestTranslatePhoneme(string input, string expected) - { - using (var t = Create()) { - Assert.AreEqual( - expected, - t.Translate(input, convertKatakanaToNarrow: false).TrimEnd() - ); - } + [TestCase("めかぶ", "ベカム")] + [TestCase("かてる", "カデドゥ")] + [TestCase("あいするな", "ア゛イドゥルダ")] + [TestCase("あいする", "ア゛イドゥル")] + [TestCase("あいすな", "ア゛イスダ")] + [TestCase("あいす", "ア゛イズ")] + [TestCase("あいでも", "ア゛イデロ")] + [TestCase("あいで", "ア゛イデ")] + [TestCase("あいに", "ア゛イディ")] + [TestCase("あいる", "ア゛イドゥ")] + [TestCase("ぼーる", "ポッドゥ")] + [TestCase("ばーる", "バードゥ")] + [TestCase("おんどぅる", "オンドゥル")] + [TestCase("おんどぅ", "オンドゥ")] + public void TestTranslatePhoneme(string input, string expected) + { + using (var t = Create()) { + Assert.AreEqual( + expected, + t.Translate(input, convertKatakanaToNarrow: false).TrimEnd() + ); } + } - [Test] - public void TestTranslateDictionaryTerm_Words() - { - using (var t = Create()) { - foreach (var pair in t.WordDictionary) { - const string inputPrepend = "あ"; - const string outputPrepend = "ア゛"; - const string inputAppend = "う"; - const string outputAppend = "ル"; + [Test] + public void TestTranslateDictionaryTerm_Words() + { + using (var t = Create()) { + foreach (var pair in t.WordDictionary) { + const string inputPrepend = "あ"; + const string outputPrepend = "ア゛"; + const string inputAppend = "う"; + const string outputAppend = "ル"; - Assert.AreEqual( - outputPrepend + pair.Value + outputAppend, - t.Translate(inputPrepend + pair.Key + inputAppend, convertKatakanaToNarrow: false).TrimEnd() - ); - } + Assert.AreEqual( + outputPrepend + pair.Value + outputAppend, + t.Translate(inputPrepend + pair.Key + inputAppend, convertKatakanaToNarrow: false).TrimEnd() + ); } } + } - [Test] - public void TestTranslateDictionaryTerm_Phrases() - { - using (var t = Create()) { - foreach (var pair in t.PhraseDictionary) { - const string inputPrepend = "あ"; - const string outputPrepend = "ア゛"; - const string inputAppend = "う"; - const string outputAppend = "ル"; + [Test] + public void TestTranslateDictionaryTerm_Phrases() + { + using (var t = Create()) { + foreach (var pair in t.PhraseDictionary) { + const string inputPrepend = "あ"; + const string outputPrepend = "ア゛"; + const string inputAppend = "う"; + const string outputAppend = "ル"; - Assert.AreEqual( - outputPrepend + pair.Value + outputAppend, - t.Translate(inputPrepend + pair.Key + inputAppend, convertKatakanaToNarrow: false).TrimEnd() - ); - } + Assert.AreEqual( + outputPrepend + pair.Value + outputAppend, + t.Translate(inputPrepend + pair.Key + inputAppend, convertKatakanaToNarrow: false).TrimEnd() + ); } } }