From b1cd95b620c7201fae621946427d163af809384e Mon Sep 17 00:00:00 2001 From: Roman Khabibov Date: Thu, 28 Mar 2019 14:01:33 +0300 Subject: [PATCH] sql: modify TRIM() function signature According to the ANSI standart, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming charcters) determined in arguments of this function. Closes #3879 --- extra/mkkeywordhash.c | 5 ++ src/box/sql/func.c | 46 ++++++++------ src/box/sql/global.c | 6 +- src/box/sql/parse.y | 39 ++++++++++++ test/sql-tap/badutf1.test.lua | 14 ++--- test/sql-tap/func.test.lua | 111 ++++++++++++++++++++++------------ test/sql-tap/with1.test.lua | 2 +- 7 files changed, 156 insertions(+), 67 deletions(-) diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545260..94a768323ecc 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -91,6 +91,7 @@ struct Keyword { # define CTE 0x00040000 #endif # define RESERVED 0x00000001 +# define FUNCTION 0x00080000 /* ** These are the keywords */ @@ -202,6 +203,7 @@ static Keyword aKeywordTable[] = { { "TO", "TK_TO", ALWAYS, true }, { "TRANSACTION", "TK_TRANSACTION", ALWAYS, true }, { "TRIGGER", "TK_TRIGGER", TRIGGER, true }, + { "TRIM", "TK_TRIM", FUNCTION, true }, { "UNION", "TK_UNION", COMPOUND, true }, { "UNIQUE", "TK_UNIQUE", ALWAYS, true }, { "UPDATE", "TK_UPDATE", ALWAYS, true }, @@ -278,6 +280,9 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index a750e52a1b5f..07d3cd25d4dd 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1207,8 +1207,7 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) } /* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. + * Implementation of the TRIM() function. */ static void trimFunc(sql_context * context, int argc, sql_value ** argv) @@ -1216,32 +1215,49 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) const unsigned char *zIn; /* Input string */ const unsigned char *zCharSet; /* Set of characters to trim */ int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ int i; /* Loop counter */ unsigned char *aLen = 0; /* Length of each character in zCharSet */ unsigned char **azChar = 0; /* Individual characters in zCharSet */ int nChar; /* Number of characters in zCharSet */ + /* The index of trim source in the argv array.*/ + int source_index = argc - 1; + /* True if character set has been passed, false if has't been. */ + bool set = true; + /* 1: if it's left side. + * 2: if it's right side. + * 3: if it's both sides. */ + int trim_side = 3; + + /* If we have 2 agrs, the first can be trimiing side or character set. + * If we have 3 agrs, the first can be triiming side only, i.e. number. */ + if (argc == 2 && sql_value_type(argv[0]) == SQL_INTEGER) { + trim_side = sql_value_int(argv[0]); + set = false; + } else if (argc == 3) { + trim_side = sql_value_int(argv[0]); + } - if (sql_value_type(argv[0]) == SQL_NULL) { + if (sql_value_type(argv[source_index]) == SQL_NULL) { return; } - zIn = sql_value_text(argv[0]); + + zIn = sql_value_text(argv[source_index]); if (zIn == 0) return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { + nIn = sql_value_bytes(argv[source_index]); + assert(zIn == sql_value_text(argv[source_index])); + if (source_index == 0 || set == false ) { static const unsigned char lenOne[] = { 1 }; static unsigned char *const azOne[] = { (u8 *) " " }; nChar = 1; aLen = (u8 *) lenOne; azChar = (unsigned char **)azOne; zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { + } else if ((zCharSet = sql_value_text(argv[source_index - 1])) == 0) { return; } else { const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); + int trim_set_sz = sql_value_bytes(argv[source_index - 1]); /* * Count the number of UTF-8 characters passing * through the entire char set, but not up @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) } } if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { + if (trim_side & 1) { while (nIn > 0) { int len = 0; for (i = 0; i < nChar; i++) { @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) nIn -= len; } } - if (flags & 2) { + if (trim_side & 2) { while (nIn > 0) { int len = 0; for (i = 0; i < nChar; i++) { @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), FUNCTION_COLL(trim, 1, 3, 0, trimFunc), FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 3, 3, 0, trimFunc), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/global.c b/src/box/sql/global.c index 95ad71c38e8e..fccf74100d8c 100644 --- a/src/box/sql/global.c +++ b/src/box/sql/global.c @@ -223,11 +223,13 @@ SQL_WSD struct sqlConfig sqlConfig = { FuncDefHash sqlBuiltinFunctions; /* - * Constant tokens for values 0 and 1. + * Constant tokens for necessary integer values. */ const Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false} }; /* diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index d2614d9b0add..83d79374fbf1 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -937,6 +937,45 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); +} + +%type trim_operands {ExprList*} +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} +trim_operands(A) ::= from_clause(F) expr(Y). { + A = F ? sql_expr_list_append(pParse->db, F, Y.pExpr) : + sql_expr_list_append(pParse->db,A,Y.pExpr); +} +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +%type from_clause {ExprList*} +%destructor from_clause {sql_expr_list_delete(pParse->db, $$);} + +from_clause(A) ::= trim_specification(N) expr(Y) FROM. { + struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1); + A = sql_expr_list_append(pParse->db, NULL, p); + A = sql_expr_list_append(pParse->db, A, Y.pExpr); +} +from_clause(A) ::= trim_specification(N) FROM. { + struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1); + A = sql_expr_list_append(pParse->db, NULL, p); +} +from_clause(A) ::= expr(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} +from_clause(A) ::= FROM. {A = 0;} + +%type trim_specification {int} + +trim_specification(A) ::= LEADING. { A = 1; } +trim_specification(A) ::= TRAILING. { A = 2; } +trim_specification(A) ::= BOTH. { A = 3; } + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa980d..d32bafae0e4b 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 889fc5867e5d..d9c96c5bd0e6 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14589) --!./tcltestrunner.lua -- 2001 September 15 @@ -1912,37 +1912,37 @@ test:do_test( test:do_catchsql_test( "func-22.1", [[ - SELECT trim(1,2,3) + SELECT TRIM(1,2,3) ]], { -- - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- }) test:do_catchsql_test( "func-22.2", [[ - SELECT ltrim(1,2,3) + SELECT LTRIM(1,2,3) ]], { -- - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- }) test:do_catchsql_test( "func-22.3", [[ - SELECT rtrim(1,2,3) + SELECT RTRIM(1,2,3) ]], { -- - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- }) test:do_execsql_test( "func-22.4", [[ - SELECT trim(' hi '); + SELECT TRIM(' hi '); ]], { -- "hi" @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- "6869" -- @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- "CEB2CEB3" @@ -2073,7 +2073,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.20", [[ - SELECT typeof(trim(NULL)); + SELECT typeof(TRIM(NULL)); ]], { -- "null" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- "\0A" @@ -2215,13 +2215,44 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- "\0\0A\0B" -- }) +-- gh-3879 Check BOTH. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- + "hi" + -- + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- + " hi " + -- + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- + " hi " + -- + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2869,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index f1a1699637d1..5f26f1141d3a 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(trim(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....#