Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

It turns out that UCS2 and UCS4 are defined as big-endian encodings

  • Loading branch information
slouken committed Oct 28, 2012
1 parent 2e6e2c7 commit 0fe99b939f5335661944a97448d7d30f5575d360
Showing with 85 additions and 61 deletions.
  1. +2 −2 include/SDL_stdinc.h
  2. +83 −59 src/stdlib/SDL_iconv.c
@@ -748,8 +748,8 @@ extern DECLSPEC char *SDLCALL SDL_iconv_string(const char *tocode,
const char *inbuf,
size_t inbytesleft);
#define SDL_iconv_utf8_locale(S) SDL_iconv_string("", "UTF-8", S, SDL_strlen(S)+1)
#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1)
#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1)
#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2-INTERNAL", "UTF-8", S, SDL_strlen(S)+1)
#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4-INTERNAL", "UTF-8", S, SDL_strlen(S)+1)

/* Ends C function definitions when using C++ */
#ifdef __cplusplus
@@ -87,15 +87,21 @@ enum
ENCODING_UTF32, /* Needs byte order marker */
ENCODING_UTF32BE,
ENCODING_UTF32LE,
ENCODING_UCS2, /* Native byte order assumed */
ENCODING_UCS4, /* Native byte order assumed */
ENCODING_UCS2BE,
ENCODING_UCS2LE,
ENCODING_UCS4BE,
ENCODING_UCS4LE,
};
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
#define ENCODING_UCS2NATIVE ENCODING_UCS2BE
#define ENCODING_UCS4NATIVE ENCODING_UCS4BE
#else
#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
#define ENCODING_UCS2NATIVE ENCODING_UCS2LE
#define ENCODING_UCS4NATIVE ENCODING_UCS4LE
#endif

struct _SDL_iconv_t
@@ -128,10 +134,16 @@ static struct
{ "UTF-32BE", ENCODING_UTF32BE },
{ "UTF32LE", ENCODING_UTF32LE },
{ "UTF-32LE", ENCODING_UTF32LE },
{ "UCS2", ENCODING_UCS2 },
{ "UCS-2", ENCODING_UCS2 },
{ "UCS4", ENCODING_UCS4 },
{ "UCS-4", ENCODING_UCS4 },
{ "UCS2", ENCODING_UCS2BE },
{ "UCS-2", ENCODING_UCS2BE },
{ "UCS-2LE", ENCODING_UCS2LE },
{ "UCS-2BE", ENCODING_UCS2BE },
{ "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
{ "UCS4", ENCODING_UCS4BE },
{ "UCS-4", ENCODING_UCS4BE },
{ "UCS-4LE", ENCODING_UCS4LE },
{ "UCS-4BE", ENCODING_UCS4BE },
{ "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
/* *INDENT-ON* */
};

@@ -518,6 +530,29 @@ SDL_iconv(SDL_iconv_t cd,
(Uint32) (W2 & 0x3FF)) + 0x10000;
}
break;
case ENCODING_UCS2LE:
{
Uint8 *p = (Uint8 *) src;
if (srclen < 2) {
return SDL_ICONV_EINVAL;
}
ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS2BE:
{
Uint8 *p = (Uint8 *) src;
if (srclen < 2) {
return SDL_ICONV_EINVAL;
}
ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS4BE:
case ENCODING_UTF32BE:
{
Uint8 *p = (Uint8 *) src;
@@ -531,6 +566,7 @@ SDL_iconv(SDL_iconv_t cd,
srclen -= 4;
}
break;
case ENCODING_UCS4LE:
case ENCODING_UTF32LE:
{
Uint8 *p = (Uint8 *) src;
@@ -544,28 +580,6 @@ SDL_iconv(SDL_iconv_t cd,
srclen -= 4;
}
break;
case ENCODING_UCS2:
{
Uint16 *p = (Uint16 *) src;
if (srclen < 2) {
return SDL_ICONV_EINVAL;
}
ch = *p;
src += 2;
srclen -= 2;
}
break;
case ENCODING_UCS4:
{
Uint32 *p = (Uint32 *) src;
if (srclen < 4) {
return SDL_ICONV_EINVAL;
}
ch = *p;
src += 4;
srclen -= 4;
}
break;
}

/* Encode a character */
@@ -728,64 +742,74 @@ SDL_iconv(SDL_iconv_t cd,
}
}
break;
case ENCODING_UTF32BE:
case ENCODING_UCS2BE:
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
if (ch > 0xFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 4) {
if (dstlen < 2) {
return SDL_ICONV_E2BIG;
}
p[0] = (Uint8) (ch >> 24);
p[1] = (Uint8) (ch >> 16);
p[2] = (Uint8) (ch >> 8);
p[3] = (Uint8) ch;
dst += 4;
dstlen -= 4;
p[0] = (Uint8) (ch >> 8);
p[1] = (Uint8) ch;
dst += 2;
dstlen -= 2;
}
break;
case ENCODING_UTF32LE:
case ENCODING_UCS2LE:
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
if (ch > 0xFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 4) {
if (dstlen < 2) {
return SDL_ICONV_E2BIG;
}
p[3] = (Uint8) (ch >> 24);
p[2] = (Uint8) (ch >> 16);
p[1] = (Uint8) (ch >> 8);
p[0] = (Uint8) ch;
dst += 4;
dstlen -= 4;
dst += 2;
dstlen -= 2;
}
break;
case ENCODING_UCS2:
case ENCODING_UTF32BE:
if (ch > 0x10FFFF) {
ch = UNKNOWN_UNICODE;
}
case ENCODING_UCS4BE:
if (ch > 0x7FFFFFFF) {
ch = UNKNOWN_UNICODE;
}
{
Uint16 *p = (Uint16 *) dst;
if (ch > 0xFFFF) {
ch = UNKNOWN_UNICODE;
}
if (dstlen < 2) {
Uint8 *p = (Uint8 *) dst;
if (dstlen < 4) {
return SDL_ICONV_E2BIG;
}
*p = (Uint16) ch;
dst += 2;
dstlen -= 2;
p[0] = (Uint8) (ch >> 24);
p[1] = (Uint8) (ch >> 16);
p[2] = (Uint8) (ch >> 8);
p[3] = (Uint8) ch;
dst += 4;
dstlen -= 4;
}
break;
case ENCODING_UCS4:
case ENCODING_UTF32LE:
if (ch > 0x10FFFF) {
ch = UNKNOWN_UNICODE;
}
case ENCODING_UCS4LE:
if (ch > 0x7FFFFFFF) {
ch = UNKNOWN_UNICODE;
}
{
Uint32 *p = (Uint32 *) dst;
if (ch > 0x7FFFFFFF) {
ch = UNKNOWN_UNICODE;
}
Uint8 *p = (Uint8 *) dst;
if (dstlen < 4) {
return SDL_ICONV_E2BIG;
}
*p = ch;
p[3] = (Uint8) (ch >> 24);
p[2] = (Uint8) (ch >> 16);
p[1] = (Uint8) (ch >> 8);
p[0] = (Uint8) ch;
dst += 4;
dstlen -= 4;
}

0 comments on commit 0fe99b9

Please sign in to comment.