Skip to content

Commit

Permalink
sql: modify TRIM() function signature
Browse files Browse the repository at this point in the history
According to the ANSI standart, ltrim, rtrim and trim should
be merged into one unified TRIM() function. The specialization of
trimming (left, right or both and trimming charcters) determined
in arguments of this function.

Closes #3879
  • Loading branch information
romanhabibov committed Apr 4, 2019
1 parent 3f42ef0 commit 762bd4d
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 67 deletions.
5 changes: 5 additions & 0 deletions extra/mkkeywordhash.c
Expand Up @@ -91,6 +91,7 @@ struct Keyword {
# define CTE 0x00040000
#endif
# define RESERVED 0x00000001
# define FUNCTION 0x00080000
/*
** These are the keywords
*/
Expand Down Expand Up @@ -202,6 +203,7 @@ static Keyword aKeywordTable[] = {
{ "TO", "TK_TO", ALWAYS, true },
{ "TRANSACTION", "TK_TRANSACTION", ALWAYS, true },
{ "TRIGGER", "TK_TRIGGER", TRIGGER, true },
{ "TRIM", "TK_TRIM", FUNCTION, true },
{ "UNION", "TK_UNION", COMPOUND, true },
{ "UNIQUE", "TK_UNIQUE", ALWAYS, true },
{ "UPDATE", "TK_UPDATE", ALWAYS, true },
Expand Down Expand Up @@ -278,6 +280,9 @@ static Keyword aKeywordTable[] = {
{ "WHILE", "TK_STANDARD", RESERVED, true },
{ "TEXT", "TK_TEXT", RESERVED, true },
{ "TRUNCATE", "TK_TRUNCATE", ALWAYS, true },
{ "LEADING", "TK_LEADING", ALWAYS, true },
{ "TRAILING", "TK_TRAILING", ALWAYS, true },
{ "BOTH", "TK_BOTH", ALWAYS, true },
};

/* Number of keywords */
Expand Down
46 changes: 29 additions & 17 deletions src/box/sql/func.c
Expand Up @@ -1207,41 +1207,57 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
}

/*
* Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
* The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
* Implementation of the TRIM() function.
*/
static void
trimFunc(sql_context * context, int argc, sql_value ** argv)
{
const unsigned char *zIn; /* Input string */
const unsigned char *zCharSet; /* Set of characters to trim */
int nIn; /* Number of bytes in input */
int flags; /* 1: trimleft 2: trimright 3: trim */
int i; /* Loop counter */
unsigned char *aLen = 0; /* Length of each character in zCharSet */
unsigned char **azChar = 0; /* Individual characters in zCharSet */
int nChar; /* Number of characters in zCharSet */
/* The index of trim source in the argv array.*/
int source_index = argc - 1;
/* True if character set has been passed, false if has't been. */
bool set = true;
/* 1: if it's left side.
* 2: if it's right side.
* 3: if it's both sides. */
int trim_side = 3;

/* If we have 2 agrs, the first can be trimiing side or character set.
* If we have 3 agrs, the first can be triiming side only, i.e. number. */
if (argc == 2 && sql_value_type(argv[0]) == SQL_INTEGER) {
trim_side = sql_value_int(argv[0]);
set = false;
} else if (argc == 3) {
trim_side = sql_value_int(argv[0]);
}

if (sql_value_type(argv[0]) == SQL_NULL) {
if (sql_value_type(argv[source_index]) == SQL_NULL) {
return;
}
zIn = sql_value_text(argv[0]);

zIn = sql_value_text(argv[source_index]);
if (zIn == 0)
return;
nIn = sql_value_bytes(argv[0]);
assert(zIn == sql_value_text(argv[0]));
if (argc == 1) {
nIn = sql_value_bytes(argv[source_index]);
assert(zIn == sql_value_text(argv[source_index]));
if (source_index == 0 || set == false ) {
static const unsigned char lenOne[] = { 1 };
static unsigned char *const azOne[] = { (u8 *) " " };
nChar = 1;
aLen = (u8 *) lenOne;
azChar = (unsigned char **)azOne;
zCharSet = 0;
} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
} else if ((zCharSet = sql_value_text(argv[source_index - 1])) == 0) {
return;
} else {
const unsigned char *z = zCharSet;
int trim_set_sz = sql_value_bytes(argv[1]);
int trim_set_sz = sql_value_bytes(argv[source_index - 1]);
/*
* Count the number of UTF-8 characters passing
* through the entire char set, but not up
Expand Down Expand Up @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
}
}
if (nChar > 0) {
flags = SQL_PTR_TO_INT(sql_user_data(context));
if (flags & 1) {
if (trim_side & 1) {
while (nIn > 0) {
int len = 0;
for (i = 0; i < nChar; i++) {
Expand All @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
nIn -= len;
}
}
if (flags & 2) {
if (trim_side & 2) {
while (nIn > 0) {
int len = 0;
for (i = 0; i < nChar; i++) {
Expand Down Expand Up @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void)
FIELD_TYPE_INTEGER),
FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
FIELD_TYPE_INTEGER),
FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
FUNCTION_COLL(trim, 3, 3, 0, trimFunc),
FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,
Expand Down
6 changes: 4 additions & 2 deletions src/box/sql/global.c
Expand Up @@ -223,11 +223,13 @@ SQL_WSD struct sqlConfig sqlConfig = {
FuncDefHash sqlBuiltinFunctions;

/*
* Constant tokens for values 0 and 1.
* Constant tokens for necessary integer values.
*/
const Token sqlIntTokens[] = {
{"0", 1, false},
{"1", 1, false}
{"1", 1, false},
{"2", 1, false},
{"3", 1, false}
};

/*
Expand Down
41 changes: 41 additions & 0 deletions src/box/sql/parse.y
Expand Up @@ -937,6 +937,47 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
}
%endif SQL_OMIT_CAST

expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
A.pExpr = sqlExprFunction(pParse, Y, &X);
spanSet(&A, &X, &E);
}

%type trim_operands {ExprList*}
%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}
trim_operands(A) ::= from_clause(F) expr(Y). {
if (Y.pExpr) {
A = F ? sql_expr_list_append(pParse->db, F, Y.pExpr) :
sql_expr_list_append(pParse->db, A, Y.pExpr);
}
}
trim_operands(A) ::= expr(Y). {
A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
}

%type from_clause {ExprList*}
%destructor from_clause {sql_expr_list_delete(pParse->db, $$);}

from_clause(A) ::= trim_specification(N) expr(Y) FROM. {
struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
A = sql_expr_list_append(pParse->db, NULL, p);
A = sql_expr_list_append(pParse->db, A, Y.pExpr);
}
from_clause(A) ::= trim_specification(N) FROM. {
struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
A = sql_expr_list_append(pParse->db, NULL, p);
}
from_clause(A) ::= expr(Y) FROM. {
A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
}
from_clause(A) ::= FROM. {A = 0;}

%type trim_specification {int}

trim_specification(A) ::= LEADING. { A = 1; }
trim_specification(A) ::= TRAILING. { A = 2; }
trim_specification(A) ::= BOTH. { A = 3; }

expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). {
if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){
const char *err =
Expand Down
14 changes: 7 additions & 7 deletions test/sql-tap/badutf1.test.lua
Expand Up @@ -302,7 +302,7 @@ test:do_test(
test:do_test(
"badutf-4.1",
function()
return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x")
return test:execsql2("SELECT hex(trim('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.1>
"X", "F0"
Expand All @@ -312,7 +312,7 @@ test:do_test(
test:do_test(
"badutf-4.2",
function()
return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x")
return test:execsql2("SELECT hex(trim(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.2>
"X", "F0808080FF"
Expand All @@ -322,7 +322,7 @@ test:do_test(
test:do_test(
"badutf-4.3",
function()
return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x")
return test:execsql2("SELECT hex(trim(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.3>
"X", "808080F0"
Expand All @@ -332,7 +332,7 @@ test:do_test(
test:do_test(
"badutf-4.4",
function()
return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x")
return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.4>
"X", "808080F0808080FF"
Expand All @@ -342,7 +342,7 @@ test:do_test(
test:do_test(
"badutf-4.5",
function()
return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x")
return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.5>
"X", "80F0808080FF"
Expand All @@ -352,7 +352,7 @@ test:do_test(
test:do_test(
"badutf-4.6",
function()
return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x")
return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.6>
"X", "F0808080FF"
Expand All @@ -362,7 +362,7 @@ test:do_test(
test:do_test(
"badutf-4.7",
function()
return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x")
return test:execsql2("SELECT hex(trim('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x")
end, {
-- <badutf-4.7>
"X", "FF80F0808080FF"
Expand Down

0 comments on commit 762bd4d

Please sign in to comment.