Skip to content

Commit

Permalink
correct handling of double-byte in C and C++ compilers
Browse files Browse the repository at this point in the history
  • Loading branch information
jmalak committed Dec 15, 2023
1 parent d2d37a6 commit 934654b
Show file tree
Hide file tree
Showing 14 changed files with 58 additions and 39 deletions.
1 change: 0 additions & 1 deletion bld/cc/c/cmodel.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ void InitModInfo( void )
CompFlags.emit_library_names = true;
CompFlags.emit_dependencies = true;
CompFlags.emit_targimp_symbols = true;
CompFlags.use_unicode = true;
CompFlags.no_debug_type_names = false;
CompFlags.auto_agg_inits = false;
CompFlags.no_check_inits = false;
Expand Down
16 changes: 7 additions & 9 deletions bld/cc/c/coptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,29 +215,27 @@ static void SetCharacterEncoding( void )
LoadUnicodeTable( unicode_CP );
break;
case ENC_ZK0U:
CompFlags.use_unicode = false;
SetDBChar( 0 ); /* set double-byte char type */
CompFlags.jis_to_unicode = true;
break;
/* fall through */
case ENC_ZK:
case ENC_ZK0:
CompFlags.use_unicode = false;
CompFlags.use_double_byte = true;
SetDBChar( 0 ); /* set double-byte char type */
break;
case ENC_ZK1:
CompFlags.use_unicode = false;
CompFlags.use_double_byte = true;
SetDBChar( 1 ); /* set double-byte char type */
break;
case ENC_ZK2:
CompFlags.use_unicode = false;
CompFlags.use_double_byte = true;
SetDBChar( 2 ); /* set double-byte char type */
break;
case ENC_ZK3:
CompFlags.use_unicode = false;
CompFlags.use_double_byte = true;
SetDBChar( 3 ); /* set double-byte char type */
break;
case ENC_ZKL:
CompFlags.use_unicode = false;
CompFlags.use_double_byte = true;
SetDBChar( -1 ); /* set double-byte char type to default */
break;
}
Expand Down Expand Up @@ -791,7 +789,7 @@ static void MacroDefs( void )
if( CompFlags.zc_switch_used ) {
DefSwitchMacro( "ZC" );
}
if( !CompFlags.use_unicode ) {
if( CompFlags.use_double_byte ) {
DefSwitchMacro( "ZK" );
}
#if _INTEL_CPU
Expand Down
19 changes: 14 additions & 5 deletions bld/cc/c/cscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1416,6 +1416,19 @@ int ESCChar( int c, escinp_fn ifn, msg_codes *perr_msg, escout_fn ofn )
return( n );
}

int EncodeWchar( int c )
/**********************/
{
if( CompFlags.use_double_byte ) {
if( CompFlags.jis_to_unicode ) {
c = JIS2Unicode( c );
}
} else {
c = UniCode[c];
}
return( c );
}

static TOKEN doScanCharConst( DATA_TYPE char_type )
/**************************************************
* TokenLen is alway lower then BUF_SIZE that
Expand Down Expand Up @@ -1508,11 +1521,7 @@ static TOKEN doScanCharConst( DATA_TYPE char_type )
Buffer[TokenLen++] = CurrChar;
NextChar();
} else if( char_type == TYP_WCHAR ) {
if( CompFlags.use_unicode ) {
c = UniCode[c];
} else if( CompFlags.jis_to_unicode ) {
c = JIS2Unicode( c );
}
c = EncodeWchar( c );
++i;
value = (value << 8) + ((c & 0xFF00) >> 8);
c &= 0x00FF;
Expand Down
6 changes: 1 addition & 5 deletions bld/cc/c/cstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,7 @@ static target_size RemoveEscapes( char *buf, const char *inbuf, target_size ilen
c = read_inp();
}
} else if( CompFlags.wide_char_string ) {
if( CompFlags.use_unicode ) {
c = UniCode[c];
} else if( CompFlags.jis_to_unicode ) {
c = JIS2Unicode( c );
}
c = EncodeWchar( c );
WRITE_BYTE( c );
c = c >> 8;
#if _CPU == 370
Expand Down
2 changes: 1 addition & 1 deletion bld/cc/h/ctypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ typedef struct comp_flags {
boolbit make_enums_an_int : 1; /* force all enums to be int */
boolbit original_enum_setting : 1; /* reset value if pragma used*/
boolbit zc_switch_used : 1; /* -zc switch specified */
boolbit use_unicode : 1; /* use unicode for L"abc" */
boolbit use_double_byte : 1; /* use double-byte encoding for L"abc" */
boolbit op_switch_used : 1; /* -op force floats to mem */
boolbit no_debug_type_names : 1; /* -d2~ switch specified */

Expand Down
6 changes: 4 additions & 2 deletions bld/cc/h/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* Open Watcom Project
*
* Copyright (c) 2002-2020 The Open Watcom Contributors. All Rights Reserved.
* Copyright (c) 2002-2023 The Open Watcom Contributors. All Rights Reserved.
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
Expand Down Expand Up @@ -74,4 +74,6 @@ typedef enum charset_flags {
#define HEX_BASE 'a'
#endif

extern charset_flags CharSet[LCHR_MAX];
extern charset_flags CharSet[LCHR_MAX];

extern int EncodeWchar( int c );
5 changes: 4 additions & 1 deletion bld/plusplus/c/cmdlnany.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,22 +627,25 @@ static void analyseAnyTargetOptions( OPT_STORAGE *data )
}
switch( data->char_set ) {
case OPT_ENUM_char_set_zku:
CompFlags.use_unicode = true;
loadUnicodeTable( data->zku_value );
break;
case OPT_ENUM_char_set_zk0u:
CompFlags.jis_to_unicode = true;
/* fall through */
case OPT_ENUM_char_set_zk0:
CompFlags.use_double_byte = true;
SetDBChar( 0 );
break;
case OPT_ENUM_char_set_zk1:
CompFlags.use_double_byte = true;
SetDBChar( 1 );
break;
case OPT_ENUM_char_set_zk2:
CompFlags.use_double_byte = true;
SetDBChar( 2 );
break;
case OPT_ENUM_char_set_zkl:
CompFlags.use_double_byte = true;
SetDBChar( -1 );
break;
}
Expand Down
2 changes: 1 addition & 1 deletion bld/plusplus/c/cmdlnrsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ static void macroDefs( void )
if( CompFlags.emit_names ) {
DefSwitchMacro( "EN" );
}
if( CompFlags.use_unicode ) {
if( CompFlags.use_double_byte ) {
DefSwitchMacro( "ZK" );
}
if( CompFlags.bm_switch_used ) {
Expand Down
2 changes: 1 addition & 1 deletion bld/plusplus/c/cmdlnx86.c
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ static void macroDefs( void )
if( CompFlags.zc_switch_used ) {
DefSwitchMacro( "ZC" );
}
if( CompFlags.use_unicode ) {
if( CompFlags.use_double_byte ) {
DefSwitchMacro( "ZK" );
}
if( CompFlags.sg_switch_used ) {
Expand Down
19 changes: 14 additions & 5 deletions bld/plusplus/c/cscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,19 @@ static int doESCChar( int c, bool expanding, type_id char_type )
return( n );
}

int EncodeWchar( int c )
/**********************/
{
if( CompFlags.use_double_byte ) {
if( CompFlags.jis_to_unicode ) {
c = JIS2Unicode( c );
}
} else {
c = UniCode[c];
}
return( c );
}

static TOKEN doScanCharConst( type_id char_type, bool expanding )
{
int c;
Expand Down Expand Up @@ -402,11 +415,7 @@ static TOKEN doScanCharConst( type_id char_type, bool expanding )
c &= 0x00FF;
flag.double_byte_char = true;
} else if( char_type == TYP_WCHAR ) {
if( CompFlags.use_unicode ) {
c = UniCode[c];
} else if( CompFlags.jis_to_unicode ) {
c = JIS2Unicode( c );
}
c = EncodeWchar( c );
++i;
value = (value << 8) + ((c & 0xFF00) >> 8);
c &= 0x00FF;
Expand Down
8 changes: 2 additions & 6 deletions bld/plusplus/c/stringl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* Open Watcom Project
*
* Copyright (c) 2002-2020 The Open Watcom Contributors. All Rights Reserved.
* Copyright (c) 2002-2023 The Open Watcom Contributors. All Rights Reserved.
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
Expand Down Expand Up @@ -217,11 +217,7 @@ static size_t compressLiteral( char *tgt, const char *s, size_t len, bool wide )
}
--len;
} else if( wide ) {
if( CompFlags.use_unicode ) {
chr = UniCode[chr];
} else if( CompFlags.jis_to_unicode ) {
chr = JIS2Unicode( chr );
}
chr = EncodeWchar( chr );
STORE_WCHAR( tgt, chr, new_len );
} else {
// _ASCIIOUT( chr );
Expand Down
2 changes: 1 addition & 1 deletion bld/plusplus/h/compflag.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ typedef struct comp_flags {
boolbit no_debug_type_names : 1;
boolbit emit_names : 1;
boolbit warnings_cause_bad_exit : 1;
boolbit use_unicode : 1;
boolbit use_double_byte : 1;
boolbit unique_functions : 1;
boolbit jis_to_unicode : 1;
boolbit emit_dependencies : 1;
Expand Down
3 changes: 2 additions & 1 deletion bld/plusplus/h/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* Open Watcom Project
*
* Copyright (c) 2002-2020 The Open Watcom Contributors. All Rights Reserved.
* Copyright (c) 2002-2023 The Open Watcom Contributors. All Rights Reserved.
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
Expand Down Expand Up @@ -84,5 +84,6 @@ typedef enum charset_flags {
extern charset_flags CharSet[LCHR_MAX]; // character characterizations
extern unsigned JIS2Unicode( unsigned );
extern const unsigned char TokValue[];
extern int EncodeWchar( int c );

#endif // __SCAN_H__
6 changes: 6 additions & 0 deletions docs/doc/cg/cpopts.gml
Original file line number Diff line number Diff line change
Expand Up @@ -5589,6 +5589,12 @@ execution time.
.np
This group of options deals with compile-time aspects of character
sets used in the source code.
.np
By default, the compiler uses output Unicode encoding and source code page 437
(US-ASCII) to output wide characters.
.if &e'&$SWzku ne 0 .do begin
This setting is equivalent to using the -zku=437 option.
.do end
.*
:OPTLIST.
.*
Expand Down

0 comments on commit 934654b

Please sign in to comment.