Skip to content

Commit

Permalink
Merge 8d62969 into 032da94
Browse files Browse the repository at this point in the history
  • Loading branch information
johnd0e committed Jan 16, 2021
2 parents 032da94 + 8d62969 commit 6d43e4b
Showing 1 changed file with 33 additions and 32 deletions.
65 changes: 33 additions & 32 deletions lutf8lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define UTF8_MAX 0x7FFFFFFFu
#define UTF8_MAXCP 0x10FFFFu
#define iscont(p) ((*(p) & 0xC0) == 0x80)
#define CAST(tp,expr) ((tp)(expr))

#ifndef LUA_QL
# define LUA_QL(x) "'" x "'"
Expand Down Expand Up @@ -107,8 +108,8 @@ static const char *utf8_relat (const char *s, const char *e, int idx) {
}

static int utf8_range(const char *s, const char *e, lua_Integer *i, lua_Integer *j) {
const char *ps = utf8_relat(s, e, *i);
const char *pe = utf8_relat(s, e, *j);
const char *ps = utf8_relat(s, e, CAST(int, *i));
const char *pe = utf8_relat(s, e, CAST(int, *j));
*i = (ps ? ps : (*i > 0 ? e : s)) - s;
*j = (pe ? utf8_next(pe, e) : (*j > 0 ? e : s)) - s;
return *i < *j;
Expand Down Expand Up @@ -143,7 +144,7 @@ static int find_in_range (range_table *t, size_t size, utfint ch) {
end = size;

while (begin < end) {
int mid = (begin + end) / 2;
size_t mid = (begin + end) / 2;
if (t[mid].last < ch)
begin = mid + 1;
else if (t[mid].first > ch)
Expand All @@ -162,7 +163,7 @@ static int convert_char (conv_table *t, size_t size, utfint ch) {
end = size;

while (begin < end) {
int mid = (begin + end) / 2;
size_t mid = (begin + end) / 2;
if (t[mid].last < ch)
begin = mid + 1;
else if (t[mid].first > ch)
Expand Down Expand Up @@ -295,6 +296,7 @@ static int Lutf8_sub (lua_State *L) {
static int Lutf8_reverse (lua_State *L) {
luaL_Buffer b;
const char *prev, *pprev, *ends, *e, *s = check_utf8(L, 1, &e);
(void) ends;
int lax = lua_toboolean(L, 2);
luaL_buffinit(L, &b);
if (lax) {
Expand All @@ -304,7 +306,7 @@ static int Lutf8_reverse (lua_State *L) {
}
} else {
for (prev = e; s < prev; prev = pprev) {
utfint code;
utfint code = 0;
ends = utf8_safe_decode(L, pprev = utf8_prev(s, prev), &code);
assert(ends == prev);
if (utf8_invalid(code))
Expand All @@ -326,12 +328,12 @@ static int Lutf8_byte (lua_State *L) {
lua_Integer pose = luaL_optinteger(L, 3, posi);
if (utf8_range(s, e, &posi, &pose)) {
for (e = s + pose, s = s + posi; s < e; ++n) {
utfint ch;
utfint ch = 0;
s = utf8_safe_decode(L, s, &ch);
lua_pushinteger(L, ch);
}
}
return n;
return CAST(int, n);
}

static int Lutf8_codepoint (lua_State *L) {
Expand All @@ -352,7 +354,7 @@ static int Lutf8_codepoint (lua_State *L) {
n = 0; /* count the number of returns */
se = s + pose; /* string end */
for (n = 0, s += posi - 1; s < se;) {
utfint code;
utfint code = 0;
s = utf8_safe_decode(L, s, &code);
if (!lax && utf8_invalid(code))
return luaL_error(L, "invalid UTF-8 code");
Expand All @@ -369,7 +371,7 @@ static int Lutf8_char (lua_State *L) {
for (i = 1; i <= n; ++i) {
lua_Integer code = luaL_checkinteger(L, i);
luaL_argcheck(L, code <= UTF8_MAXCP, i, "value out of range");
add_utf8char(&b, code);
add_utf8char(&b, CAST(utfint, code));
}
luaL_pushresult(&b);
return 1;
Expand All @@ -379,13 +381,13 @@ static int Lutf8_char (lua_State *L) {
static int Lutf8_##name (lua_State *L) { \
int t = lua_type(L, 1); \
if (t == LUA_TNUMBER) \
lua_pushinteger(L, utf8_to##name(lua_tointeger(L, 1))); \
lua_pushinteger(L, utf8_to##name(CAST(utfint, lua_tointeger(L, 1)))); \
else if (t == LUA_TSTRING) { \
luaL_Buffer b; \
const char *e, *s = to_utf8(L, 1, &e); \
luaL_buffinit(L, &b); \
while (s < e) { \
utfint ch; \
utfint ch = 0; \
s = utf8_safe_decode(L, s, &ch); \
add_utf8char(&b, utf8_to##name(ch)); \
} \
Expand Down Expand Up @@ -424,7 +426,7 @@ static int Lutf8_escape (lua_State *L) {
luaL_Buffer b;
luaL_buffinit(L, &b);
while (s < e) {
utfint ch;
utfint ch = 0;
s = utf8_safe_decode(L, s, &ch);
if (ch == '%') {
int hex = 0;
Expand Down Expand Up @@ -562,10 +564,10 @@ static int Lutf8_next (lua_State *L) {

static int iter_aux (lua_State *L, int strict) {
const char *e, *s = check_utf8(L, 1, &e);
int n = lua_tointeger(L, 2);
int n = CAST(int, lua_tointeger(L, 2));
const char *p = n <= 0 ? s : utf8_next(s+n-1, e);
if (p < e) {
utfint code;
utfint code = 0;
utf8_safe_decode(L, p, &code);
if (strict && utf8_invalid(code))
return luaL_error(L, "invalid UTF-8 code");
Expand All @@ -591,9 +593,9 @@ static int Lutf8_codes (lua_State *L) {
static int Lutf8_width (lua_State *L) {
int t = lua_type(L, 1);
int ambi_is_single = !lua_toboolean(L, 2);
int default_width = luaL_optinteger(L, 3, 0);
int default_width = CAST(int, luaL_optinteger(L, 3, 0));
if (t == LUA_TNUMBER) {
size_t chwidth = utf8_width(lua_tointeger(L, 1), ambi_is_single);
size_t chwidth = utf8_width(CAST(utfint, lua_tointeger(L, 1)), ambi_is_single);
if (chwidth == 0) chwidth = default_width;
lua_pushinteger(L, (lua_Integer)chwidth);
} else if (t != LUA_TSTRING)
Expand All @@ -602,7 +604,7 @@ static int Lutf8_width (lua_State *L) {
const char *e, *s = to_utf8(L, 1, &e);
int width = 0;
while (s < e) {
utfint ch;
utfint ch = 0;
int chwidth;
s = utf8_safe_decode(L, s, &ch);
chwidth = utf8_width(ch, ambi_is_single);
Expand All @@ -615,17 +617,17 @@ static int Lutf8_width (lua_State *L) {

static int Lutf8_widthindex (lua_State *L) {
const char *e, *s = check_utf8(L, 1, &e);
int width = luaL_checkinteger(L, 2);
int width = CAST(int, luaL_checkinteger(L, 2));
int ambi_is_single = !lua_toboolean(L, 3);
int default_width = luaL_optinteger(L, 4, 0);
int default_width = CAST(int, luaL_optinteger(L, 4, 0));
size_t idx = 1;
while (s < e) {
utfint ch;
utfint ch = 0;
size_t chwidth;
s = utf8_safe_decode(L, s, &ch);
chwidth = utf8_width(ch, ambi_is_single);
if (chwidth == 0) chwidth = default_width;
width -= chwidth;
width -= CAST(int, chwidth);
if (width <= 0) {
lua_pushinteger(L, idx);
lua_pushinteger(L, width + chwidth);
Expand Down Expand Up @@ -712,7 +714,7 @@ static int capture_to_close (MatchState *ms) {
}

static const char *classend (MatchState *ms, const char *p) {
utfint ch;
utfint ch = 0;
p = utf8_safe_decode(ms->L, p, &ch);
switch (ch) {
case L_ESC: {
Expand Down Expand Up @@ -758,14 +760,14 @@ static int matchbracketclass (MatchState *ms, utfint c, const char *p, const cha
p++; /* skip the `^' */
}
while (p < ec) {
utfint ch;
utfint ch = 0;
p = utf8_safe_decode(ms->L, p, &ch);
if (ch == L_ESC) {
p = utf8_safe_decode(ms->L, p, &ch);
if (match_class(c, ch))
return sig;
} else {
utfint next;
utfint next = 0;
const char *np = utf8_safe_decode(ms->L, p, &next);
if (next == '-' && np < ec) {
p = utf8_safe_decode(ms->L, np, &next);
Expand All @@ -782,7 +784,7 @@ static int singlematch (MatchState *ms, const char *s, const char *p, const char
if (s >= ms->src_end)
return 0;
else {
utfint ch, pch;
utfint ch=0, pch=0;
utf8_safe_decode(ms->L, s, &ch);
p = utf8_safe_decode(ms->L, p, &pch);
switch (pch) {
Expand All @@ -796,7 +798,7 @@ static int singlematch (MatchState *ms, const char *s, const char *p, const char
}

static const char *matchbalance (MatchState *ms, const char *s, const char **p) {
utfint ch, begin, end;
utfint ch=0, begin=0, end=0;
*p = utf8_safe_decode(ms->L, *p, &begin);
if (*p >= ms->p_end)
luaL_error(ms->L, "malformed pattern "
Expand Down Expand Up @@ -879,7 +881,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
luaL_error(ms->L, "pattern too complex");
init: /* using goto's to optimize tail recursion */
if (p != ms->p_end) { /* end of pattern? */
utfint ch;
utfint ch = 0;
utf8_safe_decode(ms->L, p, &ch);
switch (ch) {
case '(': { /* start capture */
Expand Down Expand Up @@ -1052,7 +1054,7 @@ static int find_aux (lua_State *L, int find) {
lua_Integer idx = luaL_optinteger(L, 3, 1);
const char *init;
if (!idx) idx = 1;
init = utf8_relat(s, es, idx);
init = utf8_relat(s, es, CAST(int, idx));
if (init == NULL) {
if (idx > 0) {
lua_pushnil(L); /* cannot find anything */
Expand Down Expand Up @@ -1145,7 +1147,7 @@ static int Lutf8_gmatch (lua_State *L) {
static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, const char *e) {
const char *new_end, *news = to_utf8(ms->L, 3, &new_end);
while (news < new_end) {
utfint ch;
utfint ch = 0;
news = utf8_safe_decode(ms->L, news, &ch);
if (ch != L_ESC)
add_utf8char(b, ch);
Expand Down Expand Up @@ -1225,7 +1227,7 @@ static int Lutf8_gsub (lua_State *L) {
if (e && e > s) /* non empty match? */
s = e; /* skip it */
else if (s < es) {
utfint ch;
utfint ch = 0;
s = utf8_safe_decode(L, s, &ch);
add_utf8char(&b, ch);
} else break;
Expand Down Expand Up @@ -1281,8 +1283,7 @@ LUALIB_API int luaopen_utf8 (lua_State *L) {
#if LUA_VERSION_NUM >= 502
luaL_newlib(L, libs);
#else
lua_createtable(L, 0, sizeof(libs)/sizeof(libs[0]));
luaL_register(L, NULL, libs);
luaL_register(L, "utf8", libs);
#endif

lua_pushlstring(L, UTF8PATT, sizeof(UTF8PATT)-1);
Expand Down

0 comments on commit 6d43e4b

Please sign in to comment.