Skip to content

Commit

Permalink
Apply changes from LuaFar project
Browse files Browse the repository at this point in the history
  • Loading branch information
johndoe committed Jan 15, 2021
1 parent f26fad0 commit be89281
Showing 1 changed file with 33 additions and 32 deletions.
65 changes: 33 additions & 32 deletions lutf8lib.c
Expand Up @@ -17,6 +17,7 @@ typedef unsigned int utfint;
#define UTF8_MAX 0x7FFFFFFFu
#define UTF8_MAXCP 0x10FFFFu
#define iscont(p) ((*(p) & 0xC0) == 0x80)
#define CAST(tp,expr) ((tp)(expr))

#ifndef LUA_QL
# define LUA_QL(x) "'" x "'"
Expand Down Expand Up @@ -103,8 +104,8 @@ static const char *utf8_relat (const char *s, const char *e, int idx) {
}

static int utf8_range(const char *s, const char *e, lua_Integer *i, lua_Integer *j) {
const char *ps = utf8_relat(s, e, *i);
const char *pe = utf8_relat(s, e, *j);
const char *ps = utf8_relat(s, e, CAST(int, *i));
const char *pe = utf8_relat(s, e, CAST(int, *j));
*i = (ps ? ps : (*i > 0 ? e : s)) - s;
*j = (pe ? utf8_next(pe, e) : (*j > 0 ? e : s)) - s;
return *i < *j;
Expand Down Expand Up @@ -141,7 +142,7 @@ static int find_in_range (range_table *t, size_t size, utfint ch) {
end = size;

while (begin < end) {
int mid = (begin + end) / 2;
size_t mid = (begin + end) / 2;
if (t[mid].last < ch)
begin = mid + 1;
else if (t[mid].first > ch)
Expand All @@ -160,7 +161,7 @@ static int convert_char (conv_table *t, size_t size, utfint ch) {
end = size;

while (begin < end) {
int mid = (begin + end) / 2;
size_t mid = (begin + end) / 2;
if (t[mid].last < ch)
begin = mid + 1;
else if (t[mid].first > ch)
Expand Down Expand Up @@ -295,6 +296,7 @@ static int Lutf8_sub (lua_State *L) {
static int Lutf8_reverse (lua_State *L) {
luaL_Buffer b;
const char *prev, *pprev, *ends, *e, *s = check_utf8(L, 1, &e);
(void) ends;
int lax = lua_toboolean(L, 2);
luaL_buffinit(L, &b);
if (lax) {
Expand All @@ -303,8 +305,8 @@ static int Lutf8_reverse (lua_State *L) {
luaL_addlstring(&b, prev, e-prev);
}
} else {
utfint code = 0;
for (prev = e; s < prev; prev = pprev) {
utfint code;
ends = utf8_safe_decode(L, pprev = utf8_prev(s, prev), &code);
assert(ends == prev);
if (utf8_invalid(code))
Expand All @@ -325,13 +327,13 @@ static int Lutf8_byte (lua_State *L) {
lua_Integer posi = luaL_optinteger(L, 2, 1);
lua_Integer pose = luaL_optinteger(L, 3, posi);
if (utf8_range(s, e, &posi, &pose)) {
utfint ch = 0;
for (e = s + pose, s = s + posi; s < e; ++n) {
utfint ch;
s = utf8_safe_decode(L, s, &ch);
lua_pushinteger(L, ch);
}
}
return n;
return CAST(int, n);
}

static int Lutf8_codepoint (lua_State *L) {
Expand All @@ -342,6 +344,7 @@ static int Lutf8_codepoint (lua_State *L) {
int lax = lua_toboolean(L, 4);
int n;
const char *se;
utfint code = 0;
luaL_argcheck(L, posi >= 1, 2, "out of range");
luaL_argcheck(L, pose <= (lua_Integer)len, 3, "out of range");
if (posi > pose) return 0; /* empty interval; return no values */
Expand All @@ -352,7 +355,6 @@ static int Lutf8_codepoint (lua_State *L) {
n = 0; /* count the number of returns */
se = s + pose; /* string end */
for (n = 0, s += posi - 1; s < se;) {
utfint code;
s = utf8_safe_decode(L, s, &code);
if (!lax && utf8_invalid(code))
return luaL_error(L, "invalid UTF-8 code");
Expand All @@ -369,7 +371,7 @@ static int Lutf8_char (lua_State *L) {
for (i = 1; i <= n; ++i) {
lua_Integer code = luaL_checkinteger(L, i);
luaL_argcheck(L, code <= UTF8_MAXCP, i, "value out of range");
add_utf8char(&b, code);
add_utf8char(&b, CAST(utfint, code));
}
luaL_pushresult(&b);
return 1;
Expand All @@ -379,13 +381,13 @@ static int Lutf8_char (lua_State *L) {
static int Lutf8_##name (lua_State *L) { \
int t = lua_type(L, 1); \
if (t == LUA_TNUMBER) \
lua_pushinteger(L, utf8_to##name(lua_tointeger(L, 1))); \
lua_pushinteger(L, utf8_to##name(CAST(utfint, lua_tointeger(L, 1)))); \
else if (t == LUA_TSTRING) { \
utfint ch = 0; \
luaL_Buffer b; \
const char *e, *s = to_utf8(L, 1, &e); \
luaL_buffinit(L, &b); \
while (s < e) { \
utfint ch; \
s = utf8_safe_decode(L, s, &ch); \
add_utf8char(&b, utf8_to##name(ch)); \
} \
Expand Down Expand Up @@ -421,10 +423,10 @@ static const char *parse_escape (lua_State *L, const char *s, const char *e, int

static int Lutf8_escape (lua_State *L) {
const char *e, *s = check_utf8(L, 1, &e);
utfint ch = 0;
luaL_Buffer b;
luaL_buffinit(L, &b);
while (s < e) {
utfint ch;
s = utf8_safe_decode(L, s, &ch);
if (ch == '%') {
int hex = 0;
Expand Down Expand Up @@ -562,10 +564,10 @@ static int Lutf8_next (lua_State *L) {

static int iter_aux (lua_State *L, int strict) {
const char *e, *s = check_utf8(L, 1, &e);
int n = lua_tointeger(L, 2);
int n = CAST(int, lua_tointeger(L, 2));
const char *p = n <= 0 ? s : utf8_next(s+n-1, e);
if (p < e) {
utfint code;
utfint code = 0;
utf8_safe_decode(L, p, &code);
if (strict && utf8_invalid(code))
return luaL_error(L, "invalid UTF-8 code");
Expand All @@ -591,18 +593,18 @@ static int Lutf8_codes (lua_State *L) {
static int Lutf8_width (lua_State *L) {
int t = lua_type(L, 1);
int ambi_is_single = !lua_toboolean(L, 2);
int default_width = luaL_optinteger(L, 3, 0);
int default_width = CAST(int, luaL_optinteger(L, 3, 0));
if (t == LUA_TNUMBER) {
size_t chwidth = utf8_width(lua_tointeger(L, 1), ambi_is_single);
size_t chwidth = utf8_width(CAST(utfint, lua_tointeger(L, 1)), ambi_is_single);
if (chwidth == 0) chwidth = default_width;
lua_pushinteger(L, (lua_Integer)chwidth);
} else if (t != LUA_TSTRING)
return typeerror(L, 1, "number/string");
else {
const char *e, *s = to_utf8(L, 1, &e);
int width = 0;
utfint ch = 0;
while (s < e) {
utfint ch;
int chwidth;
s = utf8_safe_decode(L, s, &ch);
chwidth = utf8_width(ch, ambi_is_single);
Expand All @@ -615,17 +617,17 @@ static int Lutf8_width (lua_State *L) {

static int Lutf8_widthindex (lua_State *L) {
const char *e, *s = check_utf8(L, 1, &e);
int width = luaL_checkinteger(L, 2);
int width = CAST(int, luaL_checkinteger(L, 2));
int ambi_is_single = !lua_toboolean(L, 3);
int default_width = luaL_optinteger(L, 4, 0);
int default_width = CAST(int, luaL_optinteger(L, 4, 0));
size_t idx = 1;
utfint ch = 0;
while (s < e) {
utfint ch;
size_t chwidth;
s = utf8_safe_decode(L, s, &ch);
chwidth = utf8_width(ch, ambi_is_single);
if (chwidth == 0) chwidth = default_width;
width -= chwidth;
width -= CAST(int, chwidth);
if (width <= 0) {
lua_pushinteger(L, idx);
lua_pushinteger(L, width + chwidth);
Expand Down Expand Up @@ -712,7 +714,7 @@ static int capture_to_close (MatchState *ms) {
}

static const char *classend (MatchState *ms, const char *p) {
utfint ch;
utfint ch = 0;
p = utf8_safe_decode(ms->L, p, &ch);
switch (ch) {
case L_ESC: {
Expand Down Expand Up @@ -752,20 +754,20 @@ static int match_class (utfint c, utfint cl) {

static int matchbracketclass (MatchState *ms, utfint c, const char *p, const char *ec) {
int sig = 1;
utfint ch = 0;
utfint next = 0;
assert(*p == '[');
if (*++p == '^') {
sig = 0;
p++; /* skip the `^' */
}
while (p < ec) {
utfint ch;
p = utf8_safe_decode(ms->L, p, &ch);
if (ch == L_ESC) {
p = utf8_safe_decode(ms->L, p, &ch);
if (match_class(c, ch))
return sig;
} else {
utfint next;
const char *np = utf8_safe_decode(ms->L, p, &next);
if (next == '-' && np < ec) {
p = utf8_safe_decode(ms->L, np, &next);
Expand All @@ -782,7 +784,7 @@ static int singlematch (MatchState *ms, const char *s, const char *p, const char
if (s >= ms->src_end)
return 0;
else {
utfint ch, pch;
utfint ch=0, pch=0;
utf8_safe_decode(ms->L, s, &ch);
p = utf8_safe_decode(ms->L, p, &pch);
switch (pch) {
Expand All @@ -796,7 +798,7 @@ static int singlematch (MatchState *ms, const char *s, const char *p, const char
}

static const char *matchbalance (MatchState *ms, const char *s, const char **p) {
utfint ch, begin, end;
utfint ch=0, begin=0, end=0;
*p = utf8_safe_decode(ms->L, *p, &begin);
if (*p >= ms->p_end)
luaL_error(ms->L, "malformed pattern "
Expand Down Expand Up @@ -879,7 +881,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
luaL_error(ms->L, "pattern too complex");
init: /* using goto's to optimize tail recursion */
if (p != ms->p_end) { /* end of pattern? */
utfint ch;
utfint ch = 0;
utf8_safe_decode(ms->L, p, &ch);
switch (ch) {
case '(': { /* start capture */
Expand Down Expand Up @@ -1051,7 +1053,7 @@ static int find_aux (lua_State *L, int find) {
const char *ep, *p = check_utf8(L, 2, &ep);
lua_Integer idx = luaL_optinteger(L, 3, 1);
const char *init;
init = utf8_relat(s, es, idx);
init = utf8_relat(s, es, CAST(int, idx));
if (init == NULL) {
if (idx > 0) {
lua_pushnil(L); /* cannot find anything */
Expand Down Expand Up @@ -1143,8 +1145,8 @@ static int Lutf8_gmatch (lua_State *L) {

static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, const char *e) {
const char *new_end, *news = to_utf8(ms->L, 3, &new_end);
utfint ch = 0;
while (news < new_end) {
utfint ch;
news = utf8_safe_decode(ms->L, news, &ch);
if (ch != L_ESC)
add_utf8char(b, ch);
Expand Down Expand Up @@ -1202,6 +1204,7 @@ static int Lutf8_gsub (lua_State *L) {
lua_Integer n = 0;
MatchState ms;
luaL_Buffer b;
utfint ch = 0;
luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
"string/function/table expected");
Expand All @@ -1224,7 +1227,6 @@ static int Lutf8_gsub (lua_State *L) {
if (e && e > s) /* non empty match? */
s = e; /* skip it */
else if (s < es) {
utfint ch;
s = utf8_safe_decode(L, s, &ch);
add_utf8char(&b, ch);
} else break;
Expand Down Expand Up @@ -1280,8 +1282,7 @@ LUALIB_API int luaopen_utf8 (lua_State *L) {
#if LUA_VERSION_NUM >= 502
luaL_newlib(L, libs);
#else
lua_createtable(L, 0, sizeof(libs)/sizeof(libs[0]));
luaL_register(L, NULL, libs);
luaL_register(L, "utf8", libs);
#endif

lua_pushlstring(L, UTF8PATT, sizeof(UTF8PATT)-1);
Expand Down

0 comments on commit be89281

Please sign in to comment.