From fcb5932d762f3ef7be2deea694f589129c9fa541 Mon Sep 17 00:00:00 2001 From: berryzplus Date: Mon, 29 Mar 2021 23:13:38 +0900 Subject: [PATCH] =?UTF-8?q?CCodeBase=E3=81=AE=E5=A4=89=E6=8F=9B=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 無理なく実装できる部分を先行して実装する。 --- sakura_core/charset/CCodeBase.h | 48 +++- sakura_core/charset/CCodeFactory.h | 12 +- tests/unittests/test-ccodebase.cpp | 385 ++++++++++++++++++++++++++++- 3 files changed, 440 insertions(+), 5 deletions(-) diff --git a/sakura_core/charset/CCodeBase.h b/sakura_core/charset/CCodeBase.h index 2caab31323..97ae94bbcc 100644 --- a/sakura_core/charset/CCodeBase.h +++ b/sakura_core/charset/CCodeBase.h @@ -27,6 +27,10 @@ #define SAKURA_CCODEBASE_1AB194FB_933C_495E_A3A3_62E117C72644_H_ #pragma once +#include +#include +#include + #include "mem/CNativeW.h" #include "CEol.h" @@ -39,6 +43,12 @@ enum EConvertResult{ struct CommonSetting_Statusbar; +//! 変換元バイナリシーケンスを表す型。 +using BinarySequenceView = std::basic_string_view; + +//! 復元後バイナリシーケンスを表す型。 +using BinarySequence = std::basic_string; + /*! 文字コード基底クラス。 @@ -47,8 +57,42 @@ struct CommonSetting_Statusbar; */ class CCodeBase{ public: - virtual ~CCodeBase(){} -// virtual bool IsCode(const CMemory* pMem){return false;} //!< 特定コードであればtrue + virtual ~CCodeBase() noexcept = default; + + /*! + 特定コードをUnicodeにエンコードする + + @param [in] cSrc 変換対象のバイナリシーケンス + @param [out,opt] pResult 変換結果を受け取る変数 + @returns サクラエディタ仕様のUnicode文字列 + */ + virtual CNativeW CodeToUnicode( BinarySequenceView cSrc, bool* pResult = nullptr ) + { + CMemory cmemSrc( cSrc.data(), cSrc.size() ); + CNativeW cDest; + auto result = CodeToUnicode( cmemSrc, &cDest ); + if( pResult ){ + *pResult = result == RESULT_COMPLETE; + } + return cDest; + } + + /*! + Unicodeを特定コードにデコードする + + @param [in] cSrc 変換対象のUnicodeシーケンス + @param [out,opt] pResult 変換結果を受け取る変数 + @returns バイナリシーケンス + */ + virtual BinarySequence UnicodeToCode( const CNativeW& cSrc, bool* pResult = nullptr ) + { + CMemory cDest; + auto result = UnicodeToCode( cSrc, &cDest ); + if( pResult ){ + *pResult = result == RESULT_COMPLETE; + } + return BinarySequence( static_cast(cDest.GetRawPtr()), cDest.GetRawLength() ); + } //文字コード変換 virtual EConvertResult CodeToUnicode(const CMemory& cSrc, CNativeW* pDst)=0; //!< 特定コード → UNICODE 変換 diff --git a/sakura_core/charset/CCodeFactory.h b/sakura_core/charset/CCodeFactory.h index 1f3d8f397e..85eb6a6133 100644 --- a/sakura_core/charset/CCodeFactory.h +++ b/sakura_core/charset/CCodeFactory.h @@ -27,7 +27,8 @@ #define SAKURA_CCODEFACTORY_A5C6C204_F9BD_42BA_A5CD_1B086833CCA4_H_ #pragma once -class CCodeBase; +#include +#include "charset/CCodeBase.h" class CCodeFactory{ public: @@ -36,5 +37,14 @@ class CCodeFactory{ ECodeType eCodeType, //!< 文字コード int nFlag //!< bit 0: MIME Encodeされたヘッダをdecodeするかどうか ); + + //! eCodeTypeに適合する CCodeBaseインスタンス を生成 + static std::unique_ptr CreateCodeBase( + ECodeType eCodeType //!< 文字コード + ) + { + return std::unique_ptr( CreateCodeBase( eCodeType, 0 ) ); + } }; + #endif /* SAKURA_CCODEFACTORY_A5C6C204_F9BD_42BA_A5CD_1B086833CCA4_H_ */ diff --git a/tests/unittests/test-ccodebase.cpp b/tests/unittests/test-ccodebase.cpp index 5fe4c60ca9..d1692badeb 100644 --- a/tests/unittests/test-ccodebase.cpp +++ b/tests/unittests/test-ccodebase.cpp @@ -23,8 +23,9 @@ */ #include -#include -#include "charset/CCodeBase.h" +#include "charset/CCodeFactory.h" + +#include TEST(CCodeBase, MIMEHeaderDecode) { @@ -64,3 +65,383 @@ TEST(CCodeBase, MIMEHeaderDecode) EXPECT_TRUE(CCodeBase::MIMEHeaderDecode(source6.c_str(), source6.length(), &m, CODE_JIS)); EXPECT_STREQ(static_cast(m.GetRawPtr()), source6.c_str()); } + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeSJis) +{ + const auto eCodeType = CODE_SJIS; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr const auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr const auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + bool bComplete1_1 = false; + auto encoded1 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsAscii), _countof(mbsAscii) ), &bComplete1_1 ); + EXPECT_STREQ( wcsAscii, encoded1.GetStringPtr() ); + EXPECT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded1 = pCodeBase->UnicodeToCode( encoded1, &bComplete1_2 ); + EXPECT_EQ( 0, memcmp( mbsAscii, decoded1.data(), decoded1.size() ) ); + EXPECT_TRUE( bComplete1_2 ); + + // かな漢字の変換(Shift-JIS仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + constexpr const auto& mbsKanaKanji = "\xB6\xC5\x82\xA9\x82\xC8\x83\x4A\x83\x69\x8A\xBF\x8E\x9A"; + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsKanaKanji), _countof(mbsKanaKanji) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( mbsKanaKanji, decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); + + // Unicodeから変換できない文字(Shift-JIS仕様) + // 1. SJIS⇒Unicode変換ができても、元に戻せない文字は変換失敗と看做す。 + // 該当するのは NEC選定IBM拡張文字 と呼ばれる約400字。 + // 2. 先行バイトが範囲外 + // (ch1 >= 0x81 && ch1 <= 0x9F) || + // (ch1 >= 0xE0 && ch1 <= 0xFC) + // 3. 後続バイトが範囲外 + // ch2 >= 0x40 && ch2 != 0xFC && + // ch2 <= 0x7F + constexpr const auto& mbsCantConvSJis = + "\x87\x40\xED\x40\xFA\x40" // "①纊ⅰ" NEC拡張、NEC選定IBM拡張、IBM拡張 + "\x80\x40\xFD\x40\xFE\x40\xFF\x40" // 第1バイト不正 + "\x81\x0A\x81\x7F\x81\xFD\x81\xFE\x81\xFF" // 第2バイト不正 + ; + constexpr const auto& wcsCantConvSJis = + L"①\xDCED\xDC40ⅰ" + L"\xDC80@\xDCFD@\xDCFE@\xDCFF@" + L"\xDC81\n\xDC81\x7F\xDC81\xDCFD\xDC81\xDCFE\xDC81\xDCFF" + ; + + bool bComplete3_1 = true; + auto encoded3 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsCantConvSJis), _countof(mbsCantConvSJis) ), &bComplete3_1 ); + ASSERT_STREQ( wcsCantConvSJis, encoded3.GetStringPtr() ); + ASSERT_TRUE( bComplete3_1 ); //👈 仕様バグ。変換できないので false が返るべき。 + + // Unicodeから変換できない文字(Shift-JIS仕様) + constexpr const auto& wcsOGuy = L"森鷗外"; + constexpr const auto& mbsOGuy = "\x90\x58\x3F\x8A\x4F"; //森?外 + + bool bComplete4_2 = true; + auto decoded4 = pCodeBase->UnicodeToCode( wcsOGuy, &bComplete4_2 ); + ASSERT_EQ( 0, memcmp( mbsOGuy, decoded4.data(), decoded4.size() ) ); + ASSERT_FALSE( bComplete4_2 ); +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeEucJp) +{ + const auto eCodeType = CODE_EUC; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr const auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr const auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + bool bComplete1_1 = false; + auto encoded1 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsAscii), _countof(mbsAscii) ), &bComplete1_1 ); + EXPECT_STREQ( wcsAscii, encoded1.GetStringPtr() ); + EXPECT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded1 = pCodeBase->UnicodeToCode( encoded1, &bComplete1_2 ); + EXPECT_EQ( 0, memcmp( mbsAscii, decoded1.data(), decoded1.size() ) ); + EXPECT_TRUE( bComplete1_2 ); + + // かな漢字の変換(EUC-JP仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + constexpr const auto& mbsKanaKanji = "\x8E\xB6\x8E\xC5\xA4\xAB\xA4\xCA\xA5\xAB\xA5\xCA\xB4\xC1\xBB\xFA"; + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsKanaKanji), _countof(mbsKanaKanji) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( mbsKanaKanji, decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); + + // Unicodeから変換できない文字(EUC-JP仕様) + // (保留) + constexpr const auto& mbsCantConvEucJp = + "" // 第1バイト不正 + "" // 第2バイト不正 + ; + constexpr const auto& wcsCantConvEucJp = + L"" + L"" + ; + + bool bComplete3_1 = true; + auto encoded3 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsCantConvEucJp), _countof(mbsCantConvEucJp) ), &bComplete3_1 ); + //ASSERT_STREQ( wcsCantConvEucJp, encoded3.GetStringPtr() ); + //ASSERT_FALSE( bComplete3_1 ); + + // Unicodeから変換できない文字(EUC-JP仕様) + constexpr const auto& wcsOGuy = L"森鷗外"; + constexpr const auto& mbsOGuy = "\xBF\xB9\x3F\xB3\xB0"; //森?外 + + // 本来のEUC-JPは「森鷗外」を正確に表現できるため、不具合と考えられる。 + //constexpr const auto& wcsOGuy = L"森鷗外"; + //constexpr const auto& mbsOGuy = "\xBF\xB9\x8F\xEC\xBF\xB3\xB0"; + + bool bComplete4_2 = true; + auto decoded4 = pCodeBase->UnicodeToCode( wcsOGuy, &bComplete4_2 ); + ASSERT_EQ( 0, memcmp( mbsOGuy, decoded4.data(), decoded4.size() ) ); + ASSERT_FALSE( bComplete4_2 ); +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeLatin1) +{ + const auto eCodeType = CODE_LATIN1; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr const auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr const auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + bool bComplete1_1 = false; + auto encoded1 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsAscii), _countof(mbsAscii) ), &bComplete1_1 ); + EXPECT_STREQ( wcsAscii, encoded1.GetStringPtr() ); + EXPECT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded1 = pCodeBase->UnicodeToCode( encoded1, &bComplete1_2 ); + EXPECT_EQ( 0, memcmp( mbsAscii, decoded1.data(), decoded1.size() ) ); + EXPECT_TRUE( bComplete1_2 ); + + // Latin1はかな漢字変換非サポート +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeUtf8) +{ + const auto eCodeType = CODE_UTF8; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr const auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr const auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + bool bComplete1_1 = false; + auto encoded1 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsAscii), _countof(mbsAscii) ), &bComplete1_1 ); + EXPECT_STREQ( wcsAscii, encoded1.GetStringPtr() ); + EXPECT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded1 = pCodeBase->UnicodeToCode( encoded1, &bComplete1_2 ); + EXPECT_EQ( 0, memcmp( mbsAscii, decoded1.data(), decoded1.size() ) ); + EXPECT_TRUE( bComplete1_2 ); + + // かな漢字の変換(UTF-8仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + constexpr const auto& mbsKanaKanji = u8"カナかなカナ漢字"; + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsKanaKanji), _countof(mbsKanaKanji) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( mbsKanaKanji, decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeUtf8_OracleImplementation) +{ + const auto eCodeType = CODE_CESU8; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr const auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr const auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + bool bComplete1_1 = false; + auto encoded1 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsAscii), _countof(mbsAscii) ), &bComplete1_1 ); + EXPECT_STREQ( wcsAscii, encoded1.GetStringPtr() ); + EXPECT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded1 = pCodeBase->UnicodeToCode( encoded1, &bComplete1_2 ); + EXPECT_EQ( 0, memcmp( mbsAscii, decoded1.data(), decoded1.size() ) ); + EXPECT_TRUE( bComplete1_2 ); + + // かな漢字の変換(UTF-8仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + constexpr const auto& mbsKanaKanji = u8"カナかなカナ漢字"; + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(mbsKanaKanji), _countof(mbsKanaKanji) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( mbsKanaKanji, decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeUtf16Le) +{ + const auto eCodeType = CODE_UNICODE; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + // リトルエンディアンのバイナリを作成 + std::basic_string bin; + for( const auto ch : mbsAscii ){ + bin.append( 1, ch ); + } + + bool bComplete1_1 = false; + auto encoded1 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(bin.data()), bin.size() * sizeof(decltype(bin)::value_type) ), &bComplete1_1 ); + ASSERT_STREQ( wcsAscii, encoded1.GetStringPtr() ); + ASSERT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded1 = pCodeBase->UnicodeToCode( encoded1, &bComplete1_2 ); + ASSERT_EQ( 0, memcmp( bin.data(), decoded1.data(), decoded1.size() ) ); + ASSERT_TRUE( bComplete1_2 ); + + // かな漢字の変換(UTF-16LE仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + + // リトルエンディアンのバイナリを作成 + bin.clear(); + for( const auto ch : wcsKanaKanji ){ + bin.append( 1, ch ); + } + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(bin.data()), bin.size() * sizeof(decltype(bin)::value_type) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( bin.data(), decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeUtf16Be) +{ + const auto eCodeType = CODE_UNICODEBE; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + // ビッグエンディアンのバイナリを作成 + std::basic_string bin; + for( const auto ch : mbsAscii ){ + bin.append( 1, ::_byteswap_ushort( ch ) ); + } + + bool bComplete1_1 = false; + auto encoded = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(bin.data()), bin.size() * sizeof(decltype(bin)::value_type)), &bComplete1_1 ); + ASSERT_STREQ( wcsAscii, encoded.GetStringPtr() ); + ASSERT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded = pCodeBase->UnicodeToCode( encoded, &bComplete1_2 ); + ASSERT_EQ( 0, memcmp( bin.data(), decoded.data(), decoded.size() ) ); + ASSERT_TRUE( bComplete1_2 ); + + // かな漢字の変換(UTF-16BE仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + + // ビッグエンディアンのバイナリを作成 + bin.clear(); + for( const auto ch : wcsKanaKanji ){ + bin.append( 1, ::_byteswap_ushort( ch ) ); + } + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(bin.data()), bin.size() * sizeof(decltype(bin)::value_type) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( bin.data(), decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); +} + +/*! + * @brief 文字コード変換のテスト + */ +TEST(CCodeBase, codeUtf32Le) +{ + const auto eCodeType = (ECodeType)12000; + auto pCodeBase = CCodeFactory::CreateCodeBase( eCodeType ); + + // 7bit ASCII範囲(等価変換) + constexpr auto& mbsAscii = "\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + constexpr auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; + + // リトルエンディアンのバイナリを作成 + std::basic_string bin; + for( const auto ch : mbsAscii ){ + bin.append( 1, ch ); + } + + bool bComplete1_1 = false; + auto encoded = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(bin.data()), bin.size() * sizeof(decltype(bin)::value_type)), &bComplete1_1 ); + ASSERT_STREQ( wcsAscii, encoded.GetStringPtr() ); + ASSERT_TRUE( bComplete1_1 ); + + bool bComplete1_2 = false; + auto decoded = pCodeBase->UnicodeToCode( encoded, &bComplete1_2 ); + ASSERT_EQ( 0, memcmp( bin.data(), decoded.data(), decoded.size() ) ); + ASSERT_TRUE( bComplete1_2 ); + + // かな漢字の変換(UTF-32LE仕様) + constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字"; + + // リトルエンディアンのバイナリを作成 + bin.clear(); + for( const auto ch : wcsKanaKanji ){ + bin.append( 1, ch ); + } + + bool bComplete2_1 = false; + auto encoded2 = pCodeBase->CodeToUnicode( BinarySequenceView( reinterpret_cast(bin.data()), bin.size() * sizeof(decltype(bin)::value_type) ), &bComplete2_1 ); + ASSERT_STREQ( wcsKanaKanji, encoded2.GetStringPtr() ); + ASSERT_TRUE( bComplete2_1 ); + + bool bComplete2_2 = false; + auto decoded2 = pCodeBase->UnicodeToCode( encoded2, &bComplete2_2 ); + ASSERT_EQ( 0, memcmp( bin.data(), decoded2.data(), decoded2.size() ) ); + ASSERT_TRUE( bComplete2_2 ); +}