Skip to content

Commit

Permalink
Rework iconv encoding detection
Browse files Browse the repository at this point in the history
WCHAR_T doesn't seem as portable as we thought, so it's just easier
to detect the right encoding using macros at this point.
  • Loading branch information
sfan5 committed Mar 30, 2024
1 parent d1a1aed commit 008d6be
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 32 deletions.
37 changes: 24 additions & 13 deletions src/util/serialize.h
Expand Up @@ -24,25 +24,36 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "ieee_float.h"

#include "config.h"
#if HAVE_ENDIAN_H
#ifdef _WIN32
#define __BYTE_ORDER 0
#define __LITTLE_ENDIAN 0
#define __BIG_ENDIAN 1
#elif defined(__MACH__) && defined(__APPLE__)
#include <machine/endian.h>
#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/endian.h>
#else
#include <endian.h>
#endif
#endif
#include <cstring> // for memcpy
#include <cassert>
#include <iostream>
#include <string>
#include <string_view>

/* make sure BYTE_ORDER macros are available */
#ifdef _WIN32
#define BYTE_ORDER 1234
#elif defined(__MACH__) && defined(__APPLE__)
#include <machine/endian.h>
#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/endian.h>
#elif HAVE_ENDIAN_H
#include <endian.h>
#else
#error "Can't detect endian (missing header)"
#endif
#ifndef LITTLE_ENDIAN
#define LITTLE_ENDIAN 1234
#endif
#ifndef BIG_ENDIAN
#define BIG_ENDIAN 4321
#endif
#if !defined(BYTE_ORDER) && defined(_BYTE_ORDER)
#define BYTE_ORDER _BYTE_ORDER
#elif !defined(BYTE_ORDER) && defined(__BYTE_ORDER)
#define BYTE_ORDER __BYTE_ORDER
#endif

#define FIXEDPOINT_FACTOR 1000.0f

// 0x7FFFFFFF / 1000.0f is not serializable.
Expand Down
30 changes: 11 additions & 19 deletions src/util/string.cpp
Expand Up @@ -18,7 +18,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
*/

#include "string.h"
#include "pointer.h"
#include "serialize.h" // BYTE_ORDER
#include "numeric.h"
#include "log.h"

Expand Down Expand Up @@ -67,20 +67,16 @@ static bool convert(const char *to, const char *from, char *outbuf,
return true;
}

#ifdef __ANDROID__
// On Android iconv disagrees how big a wchar_t is for whatever reason
const char *DEFAULT_ENCODING = "UTF-32LE";
#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
// NetBSD does not allow "WCHAR_T" as a charset input to iconv.
#include <sys/endian.h>
#if BYTE_ORDER == BIG_ENDIAN
const char *DEFAULT_ENCODING = "UTF-32BE";
#else
const char *DEFAULT_ENCODING = "UTF-32LE";
#endif
#else
const char *DEFAULT_ENCODING = "WCHAR_T";
#endif
// select right encoding for wchar_t size
constexpr auto DEFAULT_ENCODING = ([] () -> const char* {
constexpr auto sz = sizeof(wchar_t);
static_assert(sz == 2 || sz == 4, "Unexpected wide char size");
if constexpr (sz == 2) {
return (BYTE_ORDER == BIG_ENDIAN) ? "UTF-16BE" : "UTF-16LE";
} else {
return (BYTE_ORDER == BIG_ENDIAN) ? "UTF-32BE" : "UTF-32LE";
}
})();

std::wstring utf8_to_wide(std::string_view input)
{
Expand All @@ -93,10 +89,6 @@ std::wstring utf8_to_wide(std::string_view input)
std::wstring out;
out.resize(outbuf_size / sizeof(wchar_t));

#if defined(__ANDROID__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
static_assert(sizeof(wchar_t) == 4, "Unexpected wide char size");
#endif

char *outbuf = reinterpret_cast<char*>(&out[0]);
if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
Expand Down

0 comments on commit 008d6be

Please sign in to comment.