Skip to content

Commit

Permalink
Add support for [string totitle]
Browse files Browse the repository at this point in the history
Signed-off-by: Steve Bennett <steveb@workware.net.au>
  • Loading branch information
msteveb committed Nov 28, 2011
1 parent 1e6e0d0 commit fbc62c2
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 31 deletions.
75 changes: 51 additions & 24 deletions jim.c
Original file line number Diff line number Diff line change
Expand Up @@ -2531,43 +2531,66 @@ Jim_Obj *Jim_StringRangeObj(Jim_Interp *interp,
#endif
}

static void JimStrCopyUpperLower(char *dest, const char *str, int uc)
{
while (*str) {
int c;
str += utf8_tounicode(str, &c);
dest += utf8_fromunicode(dest, uc ? utf8_upper(c) : utf8_lower(c));
}
*dest = 0;
}

static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr)
{
char *buf, *p;
char *buf;
int len;
const char *str;

SetStringFromAny(interp, strObjPtr);

str = Jim_GetString(strObjPtr, &len);

buf = p = Jim_Alloc(len + 1);
while (*str) {
int c;
str += utf8_tounicode(str, &c);
p += utf8_fromunicode(p, utf8_lower(c));
}
*p = 0;
buf = Jim_Alloc(len + 1);
JimStrCopyUpperLower(buf, str, 0);
return Jim_NewStringObjNoAlloc(interp, buf, len);
}

static Jim_Obj *JimStringToUpper(Jim_Interp *interp, Jim_Obj *strObjPtr)
{
char *buf, *p;
int len;
char *buf;
const char *str;
int len;

SetStringFromAny(interp, strObjPtr);
if (strObjPtr->typePtr != &stringObjType) {
SetStringFromAny(interp, strObjPtr);
}

str = Jim_GetString(strObjPtr, &len);

buf = p = Jim_Alloc(len + 1);
while (*str) {
int c;
str += utf8_tounicode(str, &c);
p += utf8_fromunicode(p, utf8_upper(c));
buf = Jim_Alloc(len + 1);
JimStrCopyUpperLower(buf, str, 1);
return Jim_NewStringObjNoAlloc(interp, buf, len);
}

static Jim_Obj *JimStringToTitle(Jim_Interp *interp, Jim_Obj *strObjPtr)
{
char *buf, *p;
int len;
int c;
const char *str;

str = Jim_GetString(strObjPtr, &len);
if (len == 0) {
return strObjPtr;
}
*p = 0;
buf = p = Jim_Alloc(len + 1);

str += utf8_tounicode(str, &c);
p += utf8_fromunicode(p, utf8_title(c));

JimStrCopyUpperLower(p, str, 0);

return Jim_NewStringObjNoAlloc(interp, buf, len);
}

Expand Down Expand Up @@ -12639,15 +12662,15 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
int opt_case = 1;
int option;
static const char * const options[] = {
"bytelength", "length", "compare", "match", "equal", "is", "byterange", "range", "map",
"repeat", "reverse", "index", "first", "last",
"trim", "trimleft", "trimright", "tolower", "toupper", NULL
"bytelength", "length", "compare", "match", "equal", "is", "byterange", "range",
"map", "repeat", "reverse", "index", "first", "last",
"trim", "trimleft", "trimright", "tolower", "toupper", "totitle", NULL
};
enum
{
OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE, OPT_MAP,
OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER
OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE,
OPT_MAP, OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER, OPT_TOTITLE
};
static const char * const nocase_options[] = {
"-nocase", NULL
Expand Down Expand Up @@ -12894,16 +12917,20 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a

case OPT_TOLOWER:
case OPT_TOUPPER:
case OPT_TOTITLE:
if (argc != 3) {
Jim_WrongNumArgs(interp, 2, argv, "string");
return JIM_ERR;
}
if (option == OPT_TOLOWER) {
Jim_SetResult(interp, JimStringToLower(interp, argv[2]));
}
else {
else if (option == OPT_TOUPPER) {
Jim_SetResult(interp, JimStringToUpper(interp, argv[2]));
}
else {
Jim_SetResult(interp, JimStringToTitle(interp, argv[2]));
}
return JIM_OK;

case OPT_IS:
Expand Down
24 changes: 17 additions & 7 deletions parse-unidata.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,40 @@
# to generate case mapping tables
set map(lower) {}
set map(upper) {}
set map(title) {}

set f [open [lindex $argv 0]]
while {[gets $f buf] >= 0} {
foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break
set code [string tolower 0x$code]
if {$code <= 0x7f} {
set title ""
set lower ""
set upper ""
foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break
set codex [string tolower 0x$code]
if {$codex <= 0x7f} {
continue
}
if {$code > 0xffff} {
if {$codex > 0xffff} {
break
}
if {![string match L* $class]} {
continue
}
if {$upper ne ""} {
lappend map(upper) $code [string tolower 0x$upper]
lappend map(upper) $codex [string tolower 0x$upper]
}
if {$lower ne ""} {
lappend map(lower) $code [string tolower 0x$lower]
lappend map(lower) $codex [string tolower 0x$lower]
}
if {$title ne "" && $title ne $upper} {
if {$title eq $code} {
set title 0
}
lappend map(title) $codex [string tolower 0x$title]
}
}
close $f

foreach type {upper lower} {
foreach type {upper lower title} {
puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
foreach {code alt} $map($type) {
puts "\t{ $code, $alt },"
Expand Down
13 changes: 13 additions & 0 deletions tests/string.test
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,19 @@ test string-16.6 {string toupper} {
string toupper {123#$&*()}
} {123#$&*()}

test string-17.1 {string totitle} -body {
string totitle
} -returnCodes error -match glob -result {wrong # args: should be "string totitle string*}
test string-17.3 {string totitle} {
string totitle abCDEf
} {Abcdef}
test string-17.4 {string totitle} {
string totitle "abc xYz"
} {Abc xyz}
test string-17.5 {string totitle} {
string totitle {123#$&*()}
} {123#$&*()}

test string-18.1 {string trim} {
list [catch {string trim} msg]
} {1}
Expand Down
9 changes: 9 additions & 0 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,13 @@ int utf8_lower(int ch)
return utf8_map_case(unicode_case_mapping_lower, ARRAYSIZE(unicode_case_mapping_lower), ch);
}

int utf8_title(int ch)
{
int newch = utf8_map_case(unicode_case_mapping_title, ARRAYSIZE(unicode_case_mapping_title), ch);
if (newch != ch) {
return newch ? newch : ch;
}
return utf8_upper(ch);
}

#endif /* JIM_BOOTSTRAP */
10 changes: 10 additions & 0 deletions utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ int utf8_fromunicode(char *p, unsigned short uc);
#define utf8_strlen(S, B) ((B) < 0 ? strlen(S) : (B))
#define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1)
#define utf8_upper(C) toupper(C)
#define utf8_title(C) toupper(C)
#define utf8_lower(C) tolower(C)
#define utf8_index(C, I) (I)
#define utf8_charlen(C) 1
Expand Down Expand Up @@ -95,6 +96,15 @@ int utf8_prev_len(const char *str, int len);
*/
int utf8_upper(int uc);

/**
* Returns the title-case variant of the given unicode codepoint.
*
* If none, returns utf8_upper().
*
* Unicode code points > \uffff are returned unchanged.
*/
int utf8_title(int uc);

/**
* Returns the lower-case variant of the given unicode codepoint.
*
Expand Down

0 comments on commit fbc62c2

Please sign in to comment.