From 28f8dbcf20f5287cb027f3502fafbd30ce70564f Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Thu, 7 Jul 2022 16:10:46 +0400 Subject: [PATCH] New unit-test stringTools.ICULanguageInfo --- src/opds_dumper.cpp | 11 ++--------- src/tools/stringTools.cpp | 18 ++++++++++++++++++ src/tools/stringTools.h | 14 ++++++++++++++ test/stringTools.cpp | 26 ++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/opds_dumper.cpp b/src/opds_dumper.cpp index 91bfb7f44..6255087ea 100644 --- a/src/opds_dumper.cpp +++ b/src/opds_dumper.cpp @@ -22,7 +22,6 @@ #include "kiwixlib-resources.h" #include -#include #include "tools/stringTools.h" #include "tools/otherTools.h" @@ -163,14 +162,8 @@ std::once_flag fillLanguagesFlag; void fillLanguagesMap() { for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) { - auto lang = *icuLangPtr; - const icu::Locale locale(lang); - icu::UnicodeString ustring; - locale.getDisplayLanguage(locale, ustring); - std::string displayLanguage; - ustring.toUTF8String(displayLanguage); - std::string iso3LangCode = locale.getISO3Language(); - iso639_3.insert({iso3LangCode, displayLanguage}); + const ICULanguageInfo lang(*icuLangPtr); + iso639_3.insert({lang.iso3Code(), lang.selfName()}); } } diff --git a/src/tools/stringTools.cpp b/src/tools/stringTools.cpp index e7cb851ec..dcd5fc4ff 100644 --- a/src/tools/stringTools.cpp +++ b/src/tools/stringTools.cpp @@ -49,6 +49,24 @@ void kiwix::loadICUExternalTables() #endif } +kiwix::ICULanguageInfo::ICULanguageInfo(const std::string& langCode) + : locale(langCode.c_str()) +{} + +std::string kiwix::ICULanguageInfo::iso3Code() const +{ + return locale.getISO3Language(); +} + +std::string kiwix::ICULanguageInfo::selfName() const +{ + icu::UnicodeString langSelfNameICUString; + locale.getDisplayLanguage(locale, langSelfNameICUString); + std::string langSelfName; + langSelfNameICUString.toUTF8String(langSelfName); + return langSelfName; +} + std::string kiwix::removeAccents(const std::string& text) { loadICUExternalTables(); diff --git a/src/tools/stringTools.h b/src/tools/stringTools.h index 337548578..3de1f086b 100644 --- a/src/tools/stringTools.h +++ b/src/tools/stringTools.h @@ -21,6 +21,7 @@ #define KIWIX_STRINGTOOLS_H #include +#include #include #include @@ -41,6 +42,19 @@ std::string encodeDiples(const std::string& str); std::string removeAccents(const std::string& text); void loadICUExternalTables(); +class ICULanguageInfo +{ +public: + explicit ICULanguageInfo(const std::string& langCode); + + std::string iso3Code() const; + std::string selfName() const; + +private: + const icu::Locale locale; +}; + + std::string urlEncode(const std::string& value, bool encodeReserved = false); std::string urlDecode(const std::string& value, bool component = false); diff --git a/test/stringTools.cpp b/test/stringTools.cpp index d91670933..7402f6e0d 100644 --- a/test/stringTools.cpp +++ b/test/stringTools.cpp @@ -31,6 +31,32 @@ using namespace kiwix; namespace { + +// Some unit-tests may fail because of partial/missing ICU data. This test +// is intended to pinpoint to the root cause in such build environments. +TEST(stringTools, ICULanguageInfo) +{ + ASSERT_GE(ICULanguageInfo("en").selfName(), "English"); + ASSERT_GE(ICULanguageInfo("eng").selfName(), "English"); + ASSERT_GE(ICULanguageInfo("fr").selfName(), "français"); + ASSERT_GE(ICULanguageInfo("fra").selfName(), "français"); + ASSERT_GE(ICULanguageInfo("de").selfName(), "Deutsch"); + ASSERT_GE(ICULanguageInfo("deu").selfName(), "Deutsch"); + ASSERT_GE(ICULanguageInfo("es").selfName(), "español"); + ASSERT_GE(ICULanguageInfo("spa").selfName(), "español"); + ASSERT_GE(ICULanguageInfo("it").selfName(), "italiano"); + ASSERT_GE(ICULanguageInfo("ita").selfName(), "italiano"); + ASSERT_GE(ICULanguageInfo("ru").selfName(), "русский"); + ASSERT_GE(ICULanguageInfo("rus").selfName(), "русский"); + ASSERT_GE(ICULanguageInfo("hy").selfName(), "հայերեն"); + ASSERT_GE(ICULanguageInfo("hye").selfName(), "հայերեն"); + ASSERT_GE(ICULanguageInfo("zh").selfName(), "中文"); + ASSERT_GE(ICULanguageInfo("zho").selfName(), "中文"); + ASSERT_GE(ICULanguageInfo("ar").selfName(), "العربية"); + ASSERT_GE(ICULanguageInfo("ara").selfName(), "العربية"); + ASSERT_GE(ICULanguageInfo("c++").selfName(), "c++"); +} + TEST(stringTools, join) { std::vector list = { "a", "b", "c" };