Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: e498158f68
Fetching contributors…

Cannot retrieve contributors at this time

305 lines (274 sloc) 7.297 kb
// charset.cpp - part of TweakPNG
/*
Copyright (C) 2008 Jason Summers
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
See the file tweakpng-src.txt for more information.
*/
// Note: I am aware of the WideCharToMultiByte and MultiByteToWideChar Windows
// API functions, but they don't work the same on all versions of Windows,
// and anyway I was interested in trying to write my own functions to do this.
#include "twpng-config.h"
#include <windows.h>
#include <tchar.h>
#include <malloc.h>
#include "tweakpng.h"
#ifdef UNICODE
int convert_tchar_to_latin1(const TCHAR *src, int srclen,
char **pdst, int *pdstlen)
{
UINT16 c;
int spos,dpos;
unsigned char *dst;
*pdst=NULL;
*pdstlen=0;
dst = (unsigned char*)malloc(srclen+1); // This memory won't all be used if there are surrogate pairs.
if(!dst) return 0;
dpos=0;
for(spos=0;spos<srclen;spos++) {
c=(UINT16)src[spos];
if( (c & 0xfc00)==0xd800 ) {
// first word of a surrogate pair
}
else if( (c & 0xfc00)==0xdc00 ) {
// second word of a surrogate pair
dst[dpos++] = '?';
}
else if(c>0xff) {
// character not representable in latin-1
dst[dpos++] = '?';
}
else {
dst[dpos++] = (unsigned char)c;
}
}
dst[dpos] = '\0';
*pdst = (char*)dst;
*pdstlen = dpos;
return 1;
}
#else
int convert_tchar_to_latin1(const TCHAR *src, int srclen,
char **pdst, int *pdstlen)
{
char *dst;
*pdst=NULL;
*pdstlen=0;
dst = (char*)malloc(srclen+1);
if(!dst) return 0;
memcpy(dst,src,srclen);
dst[srclen]='\0';
*pdst=dst;
*pdstlen=srclen;
return 1;
}
#endif
int convert_latin1_to_tchar(const char *src, int srclen,
TCHAR **pdst, int *pdstlen)
{
TCHAR *dst;
int i;
*pdst=NULL;
*pdstlen=0;
dst=(TCHAR*)malloc(sizeof(TCHAR)*(srclen+1));
if(!dst) return 0;
for(i=0;i<srclen;i++) {
dst[i] = ((unsigned char)src[i]);
}
dst[srclen]='\0';
*pdst=dst;
*pdstlen=srclen;
return 1;
}
#ifdef UNICODE
// Returns the number of bytes that the given utf8 string would
// require if converted to utf16. srclen is in bytes.
static int utf8_to_utf16_count_bytes(const void *src, int srclen)
{
int count=0;
int i;
unsigned char c;
for(i=0;i<srclen;i++) {
c= ((const unsigned char*)src)[i];
if(c<=0x7f) {
// 1-byte utf8 character (=2 bytes in utf16)
count+=2;
}
else if(c>=0x80 && c<=0xbf) {
// non-initial byte of a multi-byte utf8 character
;
}
else if(c>=0xc0 && c<=0xef) {
// 1st byte of a 2-byte or 3-byte utf8 character (=2 bytes in utf16)
count+=2;
}
else if(c>=0xf0 && c<=0xf7) {
// 1st byte of a 4-byte utf8 character (=4 bytes in utf16)
count+=4;
}
else {
// invalid byte
;
}
}
return count;
}
int convert_utf8_to_utf16(const void *src, int srclen,
WCHAR **pdst, int *pdstlen)
{
WCHAR *dst;
int pending_char;
int utf8_more_bytes_expected;
unsigned char c;
int dstpos;
int i;
int memneeded;
memneeded = utf8_to_utf16_count_bytes(src,srclen);
dst = (WCHAR*)malloc(memneeded+10);
if(!dst) return 0;
dstpos=0;
pending_char=0;
for(i=0;i<srclen;i++) {
c= ((const unsigned char*)src)[i];
if(c<=0x7f) {
// 1-byte utf8 character
pending_char=0;
utf8_more_bytes_expected = 0;
dst[dstpos++] = c;
}
else if(c>=0x80 && c<=0xbf) {
// non-initial byte of a multi-byte utf8 character
pending_char = (pending_char<<6)|(c&0x3f);
utf8_more_bytes_expected--;
if(utf8_more_bytes_expected==0) {
if(pending_char>=0xd800 && pending_char<=0xdfff) {
// unrepresentable character in the surrogate-pair range
dst[dstpos++] = 0xfffd;
}
else if(pending_char<=0xffff) {
// normal single-word character
dst[dstpos++] = pending_char;
}
else {
// Character representable using a surrogate pair
// TODO: make sure this is correct.
dst[dstpos++] = 0xd800 | ((pending_char-0x10000)>>10);
dst[dstpos++] = 0xdc00 | ((pending_char-0x10000)&0x03ff);
}
}
}
else if(c>=0xc0 && c<=0xdf) {
// 1st byte of a 2-byte utf8 character
pending_char = c&0x1f;
utf8_more_bytes_expected = 1;
}
else if(c>=0xe0 && c<=0xef) {
// 1st byte of a 3-byte utf8 character
pending_char = c&0x0f;
utf8_more_bytes_expected = 2;
}
else if(c>=0xf0 && c<=0xf7) {
// 1st byte of a 4-byte utf8 character
pending_char = c&0x07;
utf8_more_bytes_expected = 3;
}
else {
// invalid byte
pending_char=0;
utf8_more_bytes_expected = 0;
}
}
dst[dstpos]= '\0';
*pdst = dst;
*pdstlen = dstpos;
return 1;
}
#endif
#ifdef UNICODE
// Returns the number of bytes that the given utf16 string would
// require if converted to utf8. srclen is in WCHARs.
static int utf16_to_utf8_count_bytes(const WCHAR *src, int srclen)
{
int i, c;
int count = 0;
for(i=0;i<srclen;i++) {
c = (int)src[i];
if(c<=0x7f) { // 1-byte utf8 character
count+=1;
}
else if(c<=0x07ff) { // 2-byte utf8 character
count+=2;
}
else if(c>=0xd800 && c<=0xdbff) {
// first word of a surrogate pair
;
}
else if(c>=0xdc00 && c<=0xdfff) {
// second word of a surrogate pair
// the surrogate pair as a whole => 4-byte utf8 character
count+=4;
}
else { // 3-byte utf8 character
count+=3;
}
}
return count;
}
int convert_utf16_to_utf8(const WCHAR *src, int srclen,
char **pdst, int *pdstlen)
{
unsigned char *dst;
int dpos;
int i;
int c;
int pending_char;
int codept;
int memneeded;
*pdst = NULL;
*pdstlen=0;
memneeded = utf16_to_utf8_count_bytes(src,srclen);
dst = (unsigned char*)malloc(memneeded+10);
if(!dst) return 0;
dpos=0;
for(i=0;i<srclen;i++) {
c = (int)src[i];
if(c<=0x7f) { // 1-byte utf8 character
dst[dpos++] = (unsigned char)c;
}
else if(c<=0x07ff) { // 2-byte utf8 character
dst[dpos++] = 0xc0 | (c>>6);
dst[dpos++] = 0x80 | (c&0x3f);
}
else if(c>=0xd800 && c<=0xdbff) {
// first word of a surrogate pair
pending_char = c;
}
else if(c>=0xdc00 && c<=0xdfff) {
// second word of a surrogate pair => 4-byte utf8 character
// TODO: make sure this is correct.
codept = (pending_char & 0x03ff) << 10;
codept |= (c & 0x03ff);
codept += 0x10000;
dst[dpos++] = 0xf0 | (codept>>18);
dst[dpos++] = 0x80 | ((codept>>12)&0x3f);
dst[dpos++] = 0x80 | ((codept>>6)&0x3f);
dst[dpos++] = 0x80 | (codept&0x3f);
}
else { // 3-byte utf8 character
dst[dpos++] = 0xe0 | (c>>12);
dst[dpos++] = 0x80 | ((c>>6)&0x3f);
dst[dpos++] = 0x80 | (c&0x3f);
}
}
dst[dpos]='\0';
*pdstlen = dpos;
*pdst = (char*)dst;
return 1;
}
#endif
Jump to Line
Something went wrong with that request. Please try again.