From dd0c526f63046fbe14c111abbec610436b498445 Mon Sep 17 00:00:00 2001 From: "D.Miwa" Date: Sat, 11 May 2024 15:26:50 +0900 Subject: [PATCH 1/3] =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB?= =?UTF-8?q?=E8=AA=AD=E3=81=BF=E8=BE=BC=E3=81=BF=E3=81=AB=E6=8E=9B=E3=81=8B?= =?UTF-8?q?=E3=82=8B=E6=99=82=E9=96=93=E3=82=92=E7=9F=AD=E7=B8=AE=E3=81=99?= =?UTF-8?q?=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sakura_core/charset/CCodeBase.cpp | 2 +- sakura_core/charset/CUtf8.cpp | 2 +- sakura_core/charset/codechecker.h | 5 ++--- sakura_core/mem/CNativeW.cpp | 2 +- sakura_core/parse/CWordParse.cpp | 2 +- sakura_core/view/CTextMetrics.cpp | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sakura_core/charset/CCodeBase.cpp b/sakura_core/charset/CCodeBase.cpp index 62877bbe52..5f525503b9 100644 --- a/sakura_core/charset/CCodeBase.cpp +++ b/sakura_core/charset/CCodeBase.cpp @@ -55,7 +55,7 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar) { // IVS - if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { + if (iSLen >= 3 && IsVariationSelector(cSrc + 1, iSLen - 1)) { if (psStatusbar->m_bDispSPCodepoint) { auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1)); } diff --git a/sakura_core/charset/CUtf8.cpp b/sakura_core/charset/CUtf8.cpp index 73016d2fe6..8357ad373c 100644 --- a/sakura_core/charset/CUtf8.cpp +++ b/sakura_core/charset/CUtf8.cpp @@ -227,7 +227,7 @@ EConvertResult CUtf8::_UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* if (IsUTF16High(cSrc[0]) && iSLen >= 2 && IsUTF16Low(cSrc[1])) { cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, 4); } - else if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { + else if (iSLen >= 3 && IsVariationSelector(cSrc + 1, iSLen - 1)) { cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, sizeof(wchar_t) * 3); } else { diff --git a/sakura_core/charset/codechecker.h b/sakura_core/charset/codechecker.h index 738a24e7cf..c0f71c3f55 100644 --- a/sakura_core/charset/codechecker.h +++ b/sakura_core/charset/codechecker.h @@ -391,9 +391,8 @@ char32_t ConvertToUtf32(std::wstring_view text) { /*! * 文字列がIVSの異体字セレクタで始まっているか判定する */ -inline bool IsVariationSelector(std::wstring_view text) { - const auto cp = ConvertToUtf32(text); - return 0xe0100 <= cp && cp <= 0xe01ef; +inline bool IsVariationSelector(const wchar_t* pStr, size_t nLen) { + return (2 <= nLen) && (pStr[0] == 0xDB40) && (0xDD00 <= pStr[1]) && (pStr[1] <= 0xDDEF); } //! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け) diff --git a/sakura_core/mem/CNativeW.cpp b/sakura_core/mem/CNativeW.cpp index dbd8e8a98d..d50a04a0d9 100644 --- a/sakura_core/mem/CNativeW.cpp +++ b/sakura_core/mem/CNativeW.cpp @@ -397,7 +397,7 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx } // IVSの異体字セレクタチェック - if (IsVariationSelector(pData + nIdx + 1)) { + if (IsVariationSelector(pData + nIdx + 1, nDataLen - (nIdx + 1))) { // 正字 + 異体字セレクタで3個分 return CLogicInt(3); } diff --git a/sakura_core/parse/CWordParse.cpp b/sakura_core/parse/CWordParse.cpp index b1f46c98d7..8886c0a35e 100644 --- a/sakura_core/parse/CWordParse.cpp +++ b/sakura_core/parse/CWordParse.cpp @@ -187,7 +187,7 @@ ECharKind CWordParse::WhatKindOfChar( } // IVS(正字 + 異体字セレクタ) else if (nCharChars == 3 && - IsVariationSelector(pData + nIdx + 1)) + IsVariationSelector(pData + nIdx + 1, pDataLen - (nIdx + 1))) { ret = CK_ZEN_ETC; // 全角のその他(漢字など) } diff --git a/sakura_core/view/CTextMetrics.cpp b/sakura_core/view/CTextMetrics.cpp index 182c849031..9b78209e76 100644 --- a/sakura_core/view/CTextMetrics.cpp +++ b/sakura_core/view/CTextMetrics.cpp @@ -109,7 +109,7 @@ const int* CTextMetrics::GenerateDxArray( vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing); nIndent += vResultArray.back(); - if (IsVariationSelector(pText + i + 1)) { + if (IsVariationSelector(pText + i + 1, nLength - (i + 1))) { vResultArray.push_back(0); vResultArray.push_back(0); i += 2; From fe2e51bcf51ed2372d138d79630f35267121dcfa Mon Sep 17 00:00:00 2001 From: "D.Miwa" Date: Sat, 11 May 2024 16:05:52 +0900 Subject: [PATCH 2/3] =?UTF-8?q?IsVariationSelector=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unittests/test-codechecker.cpp | 23 +++++++++++++++++++++++ tests/unittests/test-ctextmetrics.cpp | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/unittests/test-codechecker.cpp b/tests/unittests/test-codechecker.cpp index f7d84b1b01..9ef32514d1 100644 --- a/tests/unittests/test-codechecker.cpp +++ b/tests/unittests/test-codechecker.cpp @@ -45,3 +45,26 @@ TEST(ConvertToUtf32, BinaryOnSurrogate) const auto& s = L"\xdcff"; EXPECT_EQ(0, ConvertToUtf32(s)); } + +TEST(IsVariationSelector, VariationSelectorCheck) +{ + // IVS開始 + const auto& ivs1 = L"\U000E0100"; + EXPECT_TRUE(IsVariationSelector(ivs1, 2)); + + // IVS終了 + const auto& ivs2 = L"\U000E01EF"; + EXPECT_TRUE(IsVariationSelector(ivs2, 2)); + + // 長さ不足 + EXPECT_FALSE(IsVariationSelector(ivs1, 0)); + EXPECT_FALSE(IsVariationSelector(ivs1, 1)); + + // IVS開始-1 + const auto& notivs1 = L"\U000E00FF"; + EXPECT_FALSE(IsVariationSelector(notivs1, 2)); + + // IVS終了+1 + const auto& notivs2 = L"\U000E01F0"; + EXPECT_FALSE(IsVariationSelector(notivs2, 2)); +} diff --git a/tests/unittests/test-ctextmetrics.cpp b/tests/unittests/test-ctextmetrics.cpp index 4dfeee8970..1e0f2d0af8 100644 --- a/tests/unittests/test-ctextmetrics.cpp +++ b/tests/unittests/test-ctextmetrics.cpp @@ -225,7 +225,7 @@ TEST(CTextMetrics, GenerateDxArray8) // IVSのVariantSelectorが続く文字列は先頭1文字 + 幅0×2で生成する std::vector v; FakeCache1 cache; - CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 2, 0, 0, 0, 10, cache); + CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 3, 0, 0, 0, 10, cache); EXPECT_TRUE(v[0]); EXPECT_FALSE(v[1]); EXPECT_FALSE(v[2]); From 8df88f1d299d172e0e73e68f5966c9ee4bacff56 Mon Sep 17 00:00:00 2001 From: "D.Miwa" Date: Sun, 12 May 2024 22:57:30 +0900 Subject: [PATCH 3/3] =?UTF-8?q?IsVariationSelector=E3=81=AE=E5=BC=95?= =?UTF-8?q?=E6=95=B0=E3=82=92std::wstring=5Fview=E5=9E=8B=E3=81=AB?= =?UTF-8?q?=E6=88=BB=E3=81=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sakura_core/charset/CCodeBase.cpp | 2 +- sakura_core/charset/CUtf8.cpp | 2 +- sakura_core/charset/codechecker.h | 4 ++-- sakura_core/mem/CNativeW.cpp | 2 +- sakura_core/parse/CWordParse.cpp | 2 +- sakura_core/view/CTextMetrics.cpp | 2 +- tests/unittests/test-codechecker.cpp | 32 +++++++++++++--------------- 7 files changed, 22 insertions(+), 24 deletions(-) diff --git a/sakura_core/charset/CCodeBase.cpp b/sakura_core/charset/CCodeBase.cpp index 5f525503b9..62877bbe52 100644 --- a/sakura_core/charset/CCodeBase.cpp +++ b/sakura_core/charset/CCodeBase.cpp @@ -55,7 +55,7 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar) { // IVS - if (iSLen >= 3 && IsVariationSelector(cSrc + 1, iSLen - 1)) { + if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { if (psStatusbar->m_bDispSPCodepoint) { auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1)); } diff --git a/sakura_core/charset/CUtf8.cpp b/sakura_core/charset/CUtf8.cpp index 8357ad373c..73016d2fe6 100644 --- a/sakura_core/charset/CUtf8.cpp +++ b/sakura_core/charset/CUtf8.cpp @@ -227,7 +227,7 @@ EConvertResult CUtf8::_UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* if (IsUTF16High(cSrc[0]) && iSLen >= 2 && IsUTF16Low(cSrc[1])) { cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, 4); } - else if (iSLen >= 3 && IsVariationSelector(cSrc + 1, iSLen - 1)) { + else if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, sizeof(wchar_t) * 3); } else { diff --git a/sakura_core/charset/codechecker.h b/sakura_core/charset/codechecker.h index c0f71c3f55..5a2e483c5a 100644 --- a/sakura_core/charset/codechecker.h +++ b/sakura_core/charset/codechecker.h @@ -391,8 +391,8 @@ char32_t ConvertToUtf32(std::wstring_view text) { /*! * 文字列がIVSの異体字セレクタで始まっているか判定する */ -inline bool IsVariationSelector(const wchar_t* pStr, size_t nLen) { - return (2 <= nLen) && (pStr[0] == 0xDB40) && (0xDD00 <= pStr[1]) && (pStr[1] <= 0xDDEF); +inline bool IsVariationSelector(std::wstring_view text) { + return (2 <= text.size()) && (text[0] == 0xDB40) && (0xDD00 <= text[1]) && (text[1] <= 0xDDEF); } //! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け) diff --git a/sakura_core/mem/CNativeW.cpp b/sakura_core/mem/CNativeW.cpp index d50a04a0d9..1365913ff2 100644 --- a/sakura_core/mem/CNativeW.cpp +++ b/sakura_core/mem/CNativeW.cpp @@ -397,7 +397,7 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx } // IVSの異体字セレクタチェック - if (IsVariationSelector(pData + nIdx + 1, nDataLen - (nIdx + 1))) { + if (IsVariationSelector(std::wstring_view(pData + nIdx + 1, nDataLen - (nIdx + 1)))) { // 正字 + 異体字セレクタで3個分 return CLogicInt(3); } diff --git a/sakura_core/parse/CWordParse.cpp b/sakura_core/parse/CWordParse.cpp index 8886c0a35e..a09f7cf3d6 100644 --- a/sakura_core/parse/CWordParse.cpp +++ b/sakura_core/parse/CWordParse.cpp @@ -187,7 +187,7 @@ ECharKind CWordParse::WhatKindOfChar( } // IVS(正字 + 異体字セレクタ) else if (nCharChars == 3 && - IsVariationSelector(pData + nIdx + 1, pDataLen - (nIdx + 1))) + IsVariationSelector(std::wstring_view(pData + nIdx + 1, pDataLen - (nIdx + 1)))) { ret = CK_ZEN_ETC; // 全角のその他(漢字など) } diff --git a/sakura_core/view/CTextMetrics.cpp b/sakura_core/view/CTextMetrics.cpp index 9b78209e76..9d225b3d15 100644 --- a/sakura_core/view/CTextMetrics.cpp +++ b/sakura_core/view/CTextMetrics.cpp @@ -109,7 +109,7 @@ const int* CTextMetrics::GenerateDxArray( vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing); nIndent += vResultArray.back(); - if (IsVariationSelector(pText + i + 1, nLength - (i + 1))) { + if (IsVariationSelector(std::wstring_view(pText + i + 1, nLength - (i + 1)))) { vResultArray.push_back(0); vResultArray.push_back(0); i += 2; diff --git a/tests/unittests/test-codechecker.cpp b/tests/unittests/test-codechecker.cpp index 9ef32514d1..1b5c2f9aa0 100644 --- a/tests/unittests/test-codechecker.cpp +++ b/tests/unittests/test-codechecker.cpp @@ -48,23 +48,21 @@ TEST(ConvertToUtf32, BinaryOnSurrogate) TEST(IsVariationSelector, VariationSelectorCheck) { - // IVS開始 - const auto& ivs1 = L"\U000E0100"; - EXPECT_TRUE(IsVariationSelector(ivs1, 2)); + // 異体字セレクタ開始 + const auto& vs1 = L"\U000E0100"; + EXPECT_TRUE(IsVariationSelector(vs1)); - // IVS終了 - const auto& ivs2 = L"\U000E01EF"; - EXPECT_TRUE(IsVariationSelector(ivs2, 2)); + // 異体字セレクタ終了 + const auto& vs2 = L"\U000E01EF"; + EXPECT_TRUE(IsVariationSelector(vs2)); - // 長さ不足 - EXPECT_FALSE(IsVariationSelector(ivs1, 0)); - EXPECT_FALSE(IsVariationSelector(ivs1, 1)); - - // IVS開始-1 - const auto& notivs1 = L"\U000E00FF"; - EXPECT_FALSE(IsVariationSelector(notivs1, 2)); - - // IVS終了+1 - const auto& notivs2 = L"\U000E01F0"; - EXPECT_FALSE(IsVariationSelector(notivs2, 2)); + // 非該当文字列 + const auto& notvs1 = L""; + EXPECT_FALSE(IsVariationSelector(notvs1)); + const auto& notvs2 = L"\xDB40"; + EXPECT_FALSE(IsVariationSelector(notvs2)); + const auto& notvs3 = L"\U000E00FF"; + EXPECT_FALSE(IsVariationSelector(notvs3)); + const auto& notvs4 = L"\U000E01F0"; + EXPECT_FALSE(IsVariationSelector(notvs4)); }