Skip to content

Commit

Permalink
Merge pull request #795 from kiwix/icu_data_check
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr committed Jul 19, 2022
2 parents 81865c0 + 28f8dbc commit dfc6cad
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 9 deletions.
11 changes: 2 additions & 9 deletions src/opds_dumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

#include "kiwixlib-resources.h"
#include <mustache.hpp>
#include <unicode/locid.h>

#include "tools/stringTools.h"
#include "tools/otherTools.h"
Expand Down Expand Up @@ -163,14 +162,8 @@ std::once_flag fillLanguagesFlag;
void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
auto lang = *icuLangPtr;
const icu::Locale locale(lang);
icu::UnicodeString ustring;
locale.getDisplayLanguage(locale, ustring);
std::string displayLanguage;
ustring.toUTF8String(displayLanguage);
std::string iso3LangCode = locale.getISO3Language();
iso639_3.insert({iso3LangCode, displayLanguage});
const ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
}

Expand Down
18 changes: 18 additions & 0 deletions src/tools/stringTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,24 @@ void kiwix::loadICUExternalTables()
#endif
}

kiwix::ICULanguageInfo::ICULanguageInfo(const std::string& langCode)
: locale(langCode.c_str())
{}

std::string kiwix::ICULanguageInfo::iso3Code() const
{
return locale.getISO3Language();
}

std::string kiwix::ICULanguageInfo::selfName() const
{
icu::UnicodeString langSelfNameICUString;
locale.getDisplayLanguage(locale, langSelfNameICUString);
std::string langSelfName;
langSelfNameICUString.toUTF8String(langSelfName);
return langSelfName;
}

std::string kiwix::removeAccents(const std::string& text)
{
loadICUExternalTables();
Expand Down
14 changes: 14 additions & 0 deletions src/tools/stringTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define KIWIX_STRINGTOOLS_H

#include <unicode/unistr.h>
#include <unicode/locid.h>

#include <string>
#include <vector>
Expand All @@ -41,6 +42,19 @@ std::string encodeDiples(const std::string& str);
std::string removeAccents(const std::string& text);
void loadICUExternalTables();

class ICULanguageInfo
{
public:
explicit ICULanguageInfo(const std::string& langCode);

std::string iso3Code() const;
std::string selfName() const;

private:
const icu::Locale locale;
};


std::string urlEncode(const std::string& value, bool encodeReserved = false);
std::string urlDecode(const std::string& value, bool component = false);

Expand Down
26 changes: 26 additions & 0 deletions test/stringTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,32 @@ using namespace kiwix;

namespace
{

// Some unit-tests may fail because of partial/missing ICU data. This test
// is intended to pinpoint to the root cause in such build environments.
TEST(stringTools, ICULanguageInfo)
{
ASSERT_GE(ICULanguageInfo("en").selfName(), "English");
ASSERT_GE(ICULanguageInfo("eng").selfName(), "English");
ASSERT_GE(ICULanguageInfo("fr").selfName(), "français");
ASSERT_GE(ICULanguageInfo("fra").selfName(), "français");
ASSERT_GE(ICULanguageInfo("de").selfName(), "Deutsch");
ASSERT_GE(ICULanguageInfo("deu").selfName(), "Deutsch");
ASSERT_GE(ICULanguageInfo("es").selfName(), "español");
ASSERT_GE(ICULanguageInfo("spa").selfName(), "español");
ASSERT_GE(ICULanguageInfo("it").selfName(), "italiano");
ASSERT_GE(ICULanguageInfo("ita").selfName(), "italiano");
ASSERT_GE(ICULanguageInfo("ru").selfName(), "русский");
ASSERT_GE(ICULanguageInfo("rus").selfName(), "русский");
ASSERT_GE(ICULanguageInfo("hy").selfName(), "հայերեն");
ASSERT_GE(ICULanguageInfo("hye").selfName(), "հայերեն");
ASSERT_GE(ICULanguageInfo("zh").selfName(), "中文");
ASSERT_GE(ICULanguageInfo("zho").selfName(), "中文");
ASSERT_GE(ICULanguageInfo("ar").selfName(), "العربية");
ASSERT_GE(ICULanguageInfo("ara").selfName(), "العربية");
ASSERT_GE(ICULanguageInfo("c++").selfName(), "c++");
}

TEST(stringTools, join)
{
std::vector<std::string> list = { "a", "b", "c" };
Expand Down

0 comments on commit dfc6cad

Please sign in to comment.