Large diffs are not rendered by default.

@@ -8,18 +8,18 @@
#include <boost/filesystem.hpp>

namespace SongParserUtil {
const std::string DUET_P2 = "Duet singer"; // FIXME
const std::string DUET_BOTH = "Both singers"; // FIXME
/// Parse an int from string and assign it to a variable
void assign(int& var, std::string const& str);
/// Parse an unsigned int from string and assign it to a variable
void assign(unsigned& var, std::string const& str);
/// Parse a double from string and assign it to a variable
void assign(double& var, std::string str);
/// Parse a boolean from string and assign it to a variable
void assign(bool& var, std::string const& str);
/// Erase last character if it matches
void eraseLast(std::string& s, char ch = ' ');
const std::string DUET_P2 = "Duet singer"; // FIXME
const std::string DUET_BOTH = "Both singers"; // FIXME
/// Parse an int from string and assign it to a variable
void assign(int& var, std::string const& str);
/// Parse an unsigned int from string and assign it to a variable
void assign(unsigned& var, std::string const& str);
/// Parse a double from string and assign it to a variable
void assign(double& var, std::string str);
/// Parse a boolean from string and assign it to a variable
void assign(bool& var, std::string const& str);
/// Erase last character if it matches
void eraseLast(std::string& s, char ch = ' ');
}

/// parses songfiles
@@ -1,7 +1,7 @@
#include "unicode.hh"
#include "configuration.hh"

#include <boost/regex.hpp>
#include <regex>
#include <boost/scoped_ptr.hpp>
#include <unicode/unistr.h>
#include <unicode/ustream.h>
@@ -12,63 +12,72 @@ UErrorCode UnicodeUtil::m_icuError = U_ZERO_ERROR;
icu::RuleBasedCollator UnicodeUtil::m_dummyCollator(icu::UnicodeString(""), icu::Collator::PRIMARY, m_icuError);

MatchResult UnicodeUtil::getCharset (std::string const& str) {
MatchResult retval;
LocalUCharsetDetectorPointer m_chardet(ucsdet_open(&UnicodeUtil::m_icuError));
auto string = str.c_str();
ucsdet_setText(m_chardet.getAlias(), string, -1, &m_icuError);
if (U_FAILURE(UnicodeUtil::m_icuError)) {
std::string err = std::string("unicode/error: Couldn't pass text to CharsetDetector: ");
err.append(u_errorName(m_icuError));
throw std::runtime_error(err);
}
else {
const UCharsetMatch* match = ucsdet_detect(m_chardet.getAlias(), &m_icuError);
return std::pair<const char*,int>(ucsdet_getName(match, &m_icuError), ucsdet_getConfidence(match, &m_icuError));
}
MatchResult retval;
LocalUCharsetDetectorPointer m_chardet(ucsdet_open(&UnicodeUtil::m_icuError));
ucsdet_enableInputFilter(m_chardet.getAlias(), true);
auto string = str.c_str();
ucsdet_setText(m_chardet.getAlias(), string, -1, &m_icuError);
if (U_FAILURE(UnicodeUtil::m_icuError)) {
std::string err = std::string("unicode/error: Couldn't pass text to CharsetDetector: ");
err += u_errorName(m_icuError);
throw std::runtime_error(err);
}
else {
const UCharsetMatch* match = ucsdet_detect(m_chardet.getAlias(), &m_icuError);
return std::pair<std::string,int>(ucsdet_getName(match, &m_icuError), ucsdet_getConfidence(match, &m_icuError));
}
}

void convertToUTF8(std::stringstream &_stream, std::string _filename) {
std::string data = _stream.str();
MatchResult match = UnicodeUtil::getCharset(data);
icu::UnicodeString ustring;
if (match.second >= 50) { // fairly good match?
std::string charset = match.first;
if (charset == "UTF-8") {
if (data.substr(0, 3) == "\xEF\xBB\xBF") {
if (config["game/bom_warnings"].b()) {
std::clog << "unicode/warning: " << _filename << " UTF-8 BOM ignored. Please avoid editors that add a BOM to UTF-8 (e.g. Notepad)." << std::endl;
}
_stream.str(data.substr(3)); // Remove BOM if there is one
}
}
else {
if (!_filename.empty()) { std::clog << "unicode/warning: " << _filename << " is not UTF-8... (" << charset << ") detected. Use a text-editor or other utility to convert your files." << std::endl; }
std::string _str;
const char* tmp = data.c_str();
icu::UnicodeString ustring = icu::UnicodeString(tmp, charset.c_str());
_stream.str(ustring.toUTF8String(_str));
}
}
else { // If we're not confident in any particular charset, filter out anything but ASCIIw
std::string tmp;
for (char ch; _stream.get(ch);) tmp += (ch >= 0x20 && ch < 0x7F) ? ch : '?';
}
std::string data = _stream.str();
MatchResult match;
if (!_filename.empty()) {
match = UnicodeUtil::getCharset(data);
}
else {
match = std::pair<std::string,int>("UTF-8",100); // If there's no filename, assume it's internal text and thus utf-8.
}
icu::UnicodeString ustring;
if (data.substr(0, 3) == "\xEF\xBB\xBF") {
if (config["game/bom_warnings"].b()) {
std::clog << "unicode/warning: " << _filename << " UTF-8 BOM ignored. Please avoid editors that add a BOM to UTF-8 (e.g. Notepad)." << std::endl;
}
match.first = "UTF-8";
match.second = 100;
_stream.str(data.substr(3)); // Remove BOM if there is one
}
if (match.second > 10 && match.second < 50) { // 50 is a really good match, 10 means an encoding that could be conceivably used to display the text.
if (match.first == "ISO-8859-1" || match.first == "ISO-8859-2") {
match.first = "UTF-8";
match.second = 75; // Mostly western characters. Let's treat it as a UTF-8 false-negative.
}
}
if (match.second >= 50) { // fairly good match?
std::string charset = match.first;
if (charset != "UTF-8") {
if (!_filename.empty()) { std::clog << "unicode/warning: " << _filename << " does not appear to be UTF-8... (" << charset << ") detected." << std::endl; }
std::string _str;
const char* tmp = data.c_str();
icu::UnicodeString ustring = icu::UnicodeString(tmp, charset.c_str());
_stream.str(ustring.toUTF8String(_str));
}
}
}

std::string convertToUTF8(std::string const& str) {
std::stringstream ss(str);
convertToUTF8(ss, std::string());
return ss.str();
std::stringstream ss(str);
convertToUTF8(ss, std::string());
return ss.str();
}

std::string unicodeCollate(std::string const& str) {
ConfigItem::StringList termsToCollate = config["game/sorting_ignore"].sl();
std::string pattern = std::string("^((");
for (auto term: termsToCollate) {
if (term != termsToCollate.front()) { pattern.append(std::string("|")); }
pattern.append(term);
if (term == termsToCollate.back()) { pattern.append(std::string(")\\s(.+))$")); }
}
std::string collated = boost::regex_replace(convertToUTF8(str), boost::regex(pattern), "\\3 \\2");
return collated;
ConfigItem::StringList termsToCollate = config["game/sorting_ignore"].sl();
std::string pattern = std::string("^((");
for (auto term: termsToCollate) {
if (term != termsToCollate.front()) { pattern += std::string("|"); }
pattern += term;
if (term == termsToCollate.back()) { pattern += std::string(")\\s(.+))$"); }
}
std::string collated = std::regex_replace(convertToUTF8(str), std::regex(pattern, std::regex_constants::icase), "\\3 \\2");
return collated;
}
@@ -11,17 +11,17 @@ void convertToUTF8(std::stringstream &_stream, std::string _filename);
std::string convertToUTF8(std::string const& str);
std::string unicodeCollate(std::string const& str);

typedef std::pair<const char*, int> MatchResult;
typedef std::pair<std::string, int> MatchResult;

struct UnicodeUtil {

UnicodeUtil() {};
~UnicodeUtil() {};
friend class Songs;

static MatchResult getCharset(std::string const& str);

UnicodeUtil() {};
~UnicodeUtil() {};
friend class Songs;
static MatchResult getCharset(std::string const& str);
private:
static UErrorCode m_icuError;
static icu::RuleBasedCollator m_dummyCollator;
};
static UErrorCode m_icuError;
static icu::RuleBasedCollator m_dummyCollator;
};
@@ -139,7 +139,7 @@ function main {
mkdir build
cd build

cmake -DCMAKE_INSTALL_PREFIX=$TEMPDIR -DENABLE_TOOLS=${ENABLE_TOOLS} -DCMAKE_BUILD_TYPE=${RELTYPE} -DENABLE_WEBSERVER=ON -DCMAKE_VERBOSE_MAKEFILE=1 -DFreetype_INCLUDE_DIR=/opt/local/include/freetype2 -DCMAKE_OSX_DEPLOYMENT_TARGET=${DEPLOYMENT_TARGET} -DFontconfig_INCLUDE_DIR=/opt/local/include/fontconfig -DPng_INCLUDE_DIR=/opt/local/include/libpng -DAVCodec_INCLUDE_DIR=/opt/local/include/libavcodec -DAVFormat_INCLUDE_DIR=/opt/local/include/libavformat -DSWScale_INCLUDE_DIR=/opt/local/include/libswscale -DFreetype_INCLUDE_DIR=/opt/local/include/freetype2/ -DLibXML2_LIBRARY=/opt/local/lib/libxml2.dylib -DLibXML2_INCLUDE_DIR=/opt/local/include/libxml2 -DGlibmmConfig_INCLUDE_DIR=/opt/local/lib/glibmm-2.4/include -DGlibConfig_INCLUDE_DIR=/opt/local/lib/glib-2.0/include -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -DCMAKE_C_FLAGS="-arch x86_64" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DSHARE_INSTALL=Resources -DLOCALE_DIR=Resources/Locales -DCMAKE_CXX_FLAGS="-Wall -Wextra -stdlib=libc++ -arch x86_64" -DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++ -lc++ -lc++abi -arch x86_64" -DCMAKE_OSX_ARCHITECTURES="x86_64" ../..
cmake -DCMAKE_INSTALL_PREFIX=$TEMPDIR -DENABLE_TOOLS=${ENABLE_TOOLS} -DCMAKE_BUILD_TYPE=${RELTYPE} -DENABLE_WEBSERVER=ON -DCMAKE_VERBOSE_MAKEFILE=1 -DFreetype_INCLUDE_DIR=/opt/local/include/freetype2 -DCMAKE_OSX_DEPLOYMENT_TARGET=${DEPLOYMENT_TARGET} -DFontconfig_INCLUDE_DIR=/opt/local/include/fontconfig -DPng_INCLUDE_DIR=/opt/local/include/libpng -DAVCodec_INCLUDE_DIR=/opt/local/include/libavcodec -DAVFormat_INCLUDE_DIR=/opt/local/include/libavformat -DSWScale_INCLUDE_DIR=/opt/local/include/libswscale -DFreetype_INCLUDE_DIR=/opt/local/include/freetype2/ -DLibXML2_LIBRARY=/opt/local/lib/libxml2.dylib -DLibXML2_INCLUDE_DIR=/opt/local/include/libxml2 -DGlibmmConfig_INCLUDE_DIR=/opt/local/lib/glibmm-2.4/include -DGlibConfig_INCLUDE_DIR=/opt/local/lib/glib-2.0/include -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -DCMAKE_C_FLAGS="-arch x86_64" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DSHARE_INSTALL=Resources -DLOCALE_DIR=Resources/Locales -DCMAKE_CXX_FLAGS="-std=c++1z -Wall -Wextra -stdlib=libc++ -arch x86_64" -DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++ -lc++ -lc++abi -arch x86_64" -DCMAKE_OSX_ARCHITECTURES="x86_64" ../..

# -DLibXML++Config_INCLUDE_DIR=/opt/local/lib/libxml++-2.6/include
make -j${MAKE_JOBS} install # You can change the -j value in order to spawn more build threads.