Skip to content

Commit

Permalink
sql: modify TRIM() function signature
Browse files Browse the repository at this point in the history
According to the ANSI standard, ltrim, rtrim and trim should
be merged into one unified TRIM() function. The specialization of
trimming (left, right or both and trimming characters) determined
in arguments of this function.

Closes #3879
  • Loading branch information
romanhabibov committed Apr 20, 2019
1 parent a001697 commit 9ae7a84
Show file tree
Hide file tree
Showing 9 changed files with 313 additions and 146 deletions.
4 changes: 4 additions & 0 deletions extra/mkkeywordhash.c
Expand Up @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = {
{ "WHILE", "TK_STANDARD", RESERVED, true },
{ "TEXT", "TK_TEXT", RESERVED, true },
{ "TRUNCATE", "TK_TRUNCATE", ALWAYS, true },
{ "TRIM", "TK_TRIM", ALWAYS, true },
{ "LEADING", "TK_LEADING", ALWAYS, true },
{ "TRAILING", "TK_TRAILING", ALWAYS, true },
{ "BOTH", "TK_BOTH", ALWAYS, true },
};

/* Number of keywords */
Expand Down
254 changes: 158 additions & 96 deletions src/box/sql/func.c
Expand Up @@ -1286,108 +1286,173 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
sql_result_text(context, (char *)zOut, j, sql_free);
}

/*
* Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
* The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
/**
* Remove characters included in @a trim_set from @a input_str
* until encounter a character that doesn't belong to @a trim_set.
* Remove from the side specified by @a flags.
* @param context SQL context.
* @param flags Trim specification: left, right or both.
* @param trim_set The set of characters for trimming.
* @param trim_set_sz Character set size in bytes.
* @param input_str Input string for trimming.
* @param input_str_sz Input string size in bytes.
*/
static void
trimFunc(sql_context * context, int argc, sql_value ** argv)
trim_procedure(struct sql_context *context, enum trim_side_mask flags,
const unsigned char *trim_set, int trim_set_sz,
const unsigned char *input_str, int input_str_sz)
{
const unsigned char *zIn; /* Input string */
const unsigned char *zCharSet; /* Set of characters to trim */
int nIn; /* Number of bytes in input */
int flags; /* 1: trimleft 2: trimright 3: trim */
int i; /* Loop counter */
unsigned char *aLen = 0; /* Length of each character in zCharSet */
unsigned char **azChar = 0; /* Individual characters in zCharSet */
int nChar; /* Number of characters in zCharSet */

if (sql_value_type(argv[0]) == SQL_NULL) {
const unsigned char *z = trim_set;
/*
* Count the number of UTF-8 characters passing through
* the entire char set, but not up to the '\0' or X'00'
* character. This allows to handle trimming set
* containing such characters.
*/
int char_cnt = sql_utf8_char_count(z, trim_set_sz);
if (char_cnt == 0)
goto result;
/* Individual characters in the character set. */
unsigned char **ind_chars =
contextMalloc(context,
char_cnt * (sizeof(unsigned char *) + 1));
if (ind_chars == NULL)
return;
/* Length of each character in the character set. */
uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt];
z = trim_set;
int i = 0;
char_cnt = 0;
int handled_bytes_cnt = trim_set_sz;
while(handled_bytes_cnt > 0) {
ind_chars[char_cnt] = (unsigned char *)(z + i);
SQL_UTF8_FWD_1(z, i, trim_set_sz);
char_len[char_cnt] = z + i - ind_chars[char_cnt];
handled_bytes_cnt -= char_len[char_cnt];
char_cnt++;
}
zIn = sql_value_text(argv[0]);
if (zIn == 0)
return;
nIn = sql_value_bytes(argv[0]);
assert(zIn == sql_value_text(argv[0]));
if (argc == 1) {
static const unsigned char lenOne[] = { 1 };
static unsigned char *const azOne[] = { (u8 *) " " };
nChar = 1;
aLen = (u8 *) lenOne;
azChar = (unsigned char **)azOne;
zCharSet = 0;
} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
return;
} else {
const unsigned char *z = zCharSet;
int trim_set_sz = sql_value_bytes(argv[1]);
/*
* Count the number of UTF-8 characters passing
* through the entire char set, but not up
* to the '\0' or X'00' character. This allows
* to handle trimming set containing such
* characters.
*/
nChar = sql_utf8_char_count(z, trim_set_sz);
if (nChar > 0) {
azChar =
contextMalloc(context,
((i64) nChar) * (sizeof(char *) + 1));
if (azChar == 0) {
return;
}
aLen = (unsigned char *)&azChar[nChar];
z = zCharSet;
i = 0;
nChar = 0;
int handled_bytes_cnt = trim_set_sz;
while(handled_bytes_cnt > 0) {
azChar[nChar] = (unsigned char *)(z + i);
SQL_UTF8_FWD_1(z, i, trim_set_sz);
aLen[nChar] = (u8) (z + i - azChar[nChar]);
handled_bytes_cnt -= aLen[nChar];
nChar++;
}
}
}
if (nChar > 0) {
flags = SQL_PTR_TO_INT(sql_user_data(context));
if (flags & 1) {
while (nIn > 0) {
int len = 0;
for (i = 0; i < nChar; i++) {
len = aLen[i];
if (len <= nIn
&& memcmp(zIn, azChar[i], len) == 0)
break;
}
if (i >= nChar)

if (char_cnt == 0)
goto result;
if ((flags & TRIM_LEADING) != 0) {
while (input_str_sz > 0) {
int len = 0;
for (i = 0; i < char_cnt; i++) {
len = char_len[i];
if (len <= input_str_sz
&& memcmp(input_str,
ind_chars[i], len) == 0)
break;
zIn += len;
nIn -= len;
}
if (i >= char_cnt)
break;
input_str += len;
input_str_sz -= len;
}
if (flags & 2) {
while (nIn > 0) {
int len = 0;
for (i = 0; i < nChar; i++) {
len = aLen[i];
if (len <= nIn
&& memcmp(&zIn[nIn - len],
azChar[i], len) == 0)
break;
}
if (i >= nChar)
}
if ((flags & TRIM_TRAILING) != 0) {
while (input_str_sz > 0) {
int len = 0;
for (i = 0; i < char_cnt; i++) {
len = char_len[i];
if (len <= input_str_sz
&& memcmp(&input_str[input_str_sz - len],
ind_chars[i], len) == 0)
break;
nIn -= len;
}
}
if (zCharSet) {
sql_free(azChar);
if (i >= char_cnt)
break;
input_str_sz -= len;
}
}
sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT);

if (trim_set_sz != 0)
sql_free(ind_chars);

result: sql_result_text(context, (char *)input_str, input_str_sz,
SQL_TRANSIENT);
}

/**
* Normalize args from @a argv input array when it has one arg
* only.
*
* Case: TRIM(<str>)
* Call trimming procedure with TRIM_BOTH as the flags and " " as
* the trimming set.
*/
static void
trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv)
{
assert(argc == 1);
(void) argc;

const unsigned char *input_str;
if ((input_str = sql_value_text(argv[0])) == NULL)
return;

int input_str_sz = sql_value_bytes(argv[0]);
trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ",
1, input_str, input_str_sz);
}

/**
* Normalize args from @a argv input array when it has two args.
*
* Case: TRIM(<character_set> FROM <str>)
* If user has specified <character_set> only, call trimming
* procedure with TRIM_BOTH as the flags and that trimming set.
*
* Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
* If user has specified side keyword only, then call trimming
* procedure with the specified side and " " as the trimming set.
*/
static void
trim_func_two_args(struct sql_context *context, int argc, sql_value **argv)
{
assert(argc == 2);
(void) argc;

const unsigned char *input_str;
if ((input_str = sql_value_text(argv[1])) == NULL)
return;

int input_str_sz = sql_value_bytes(argv[1]);
const unsigned char *trim_set;
if (sql_value_type(argv[0]) == SQL_INTEGER) {
trim_procedure(context, sql_value_int(argv[0]),
(const unsigned char *) " ", 1,
input_str, input_str_sz);
} else if ((trim_set = sql_value_text(argv[0])) != NULL) {
int trim_set_sz = sql_value_bytes(argv[0]);
trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz,
input_str, input_str_sz);
}
}

/**
* Normalize args from @a argv input array when it has three args.
*
* Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>)
* If user has specified side keyword and <character_set>, then
* call trimming procedure with that args.
*/
static void
trim_func_three_args(struct sql_context *context, int argc, sql_value **argv)
{
assert(argc == 3);
(void) argc;

assert(sql_value_type(argv[0]) == SQL_INTEGER);
const unsigned char *input_str, *trim_set;
if ((input_str = sql_value_text(argv[2])) == NULL ||
(trim_set = sql_value_text(argv[1])) == NULL)
return;

int trim_set_sz = sql_value_bytes(argv[1]);
int input_str_sz = sql_value_bytes(argv[2]);
trim_procedure(context, sql_value_int(argv[0]), trim_set, trim_set_sz,
input_str, input_str_sz);
}

#ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION
Expand Down Expand Up @@ -1818,12 +1883,9 @@ sqlRegisterBuiltinFunctions(void)
FIELD_TYPE_INTEGER),
FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
FIELD_TYPE_INTEGER),
FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg),
FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args),
FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args),
FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,
Expand Down
55 changes: 50 additions & 5 deletions src/box/sql/parse.y
Expand Up @@ -1032,6 +1032,55 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
}
%endif SQL_OMIT_CAST

expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
A.pExpr = sqlExprFunction(pParse, Y, &X);
spanSet(&A, &X, &E);
}

%type trim_operands {struct ExprList *}
%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}

trim_operands(A) ::= trim_from_clause(F) expr(Y). {
A = sql_expr_list_append(pParse->db, F, Y.pExpr);
}

trim_operands(A) ::= expr(Y). {
A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
}

%type trim_from_clause {struct ExprList *}
%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);}

/*
* The following two rules cover three cases of keyword
* (LEADING/TRAILING/BOTH) and <trim_character_set> combination.
* The case when both of them are absent is disallowed.
*/
trim_from_clause(A) ::= expr(Y) FROM. {
A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
}

trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. {
struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER,
&sqlIntTokens[N]);
A = sql_expr_list_append(pParse->db, NULL, p);
if (Y != NULL)
A = sql_expr_list_append(pParse->db, A, Y);
}

%type expr_optional {struct Expr *}
%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);}

expr_optional(A) ::= . { A = NULL; }
expr_optional(A) ::= expr(X). { A = X.pExpr; }

%type trim_specification {enum trim_side_mask}

trim_specification(A) ::= LEADING. { A = TRIM_LEADING; }
trim_specification(A) ::= TRAILING. { A = TRIM_TRAILING; }
trim_specification(A) ::= BOTH. { A = TRIM_BOTH; }

expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). {
if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){
const char *err =
Expand Down Expand Up @@ -1294,7 +1343,7 @@ expr(A) ::= EXISTS(B) LP select(Y) RP(E). {
}

/* CASE expressions */
expr(A) ::= CASE(C) case_operand(X) case_exprlist(Y) case_else(Z) END(E). {
expr(A) ::= CASE(C) expr_optional(X) case_exprlist(Y) case_else(Z) END(E). {
spanSet(&A,&C,&E); /*A-overwrites-C*/
A.pExpr = sqlPExpr(pParse, TK_CASE, X, 0);
if( A.pExpr ){
Expand All @@ -1319,10 +1368,6 @@ case_exprlist(A) ::= WHEN expr(Y) THEN expr(Z). {
%destructor case_else {sql_expr_delete(pParse->db, $$, false);}
case_else(A) ::= ELSE expr(X). {A = X.pExpr;}
case_else(A) ::= . {A = 0;}
%type case_operand {Expr*}
%destructor case_operand {sql_expr_delete(pParse->db, $$, false);}
case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/}
case_operand(A) ::= . {A = 0;}

%type exprlist {ExprList*}
%destructor exprlist {sql_expr_list_delete(pParse->db, $$);}
Expand Down
4 changes: 3 additions & 1 deletion src/box/sql/parse_def.c
Expand Up @@ -34,7 +34,9 @@

const struct Token sqlIntTokens[] = {
{"0", 1, false},
{"1", 1, false}
{"1", 1, false},
{"2", 1, false},
{"3", 1, false},
};

void
Expand Down
2 changes: 1 addition & 1 deletion src/box/sql/parse_def.h
Expand Up @@ -87,7 +87,7 @@ struct Token {
bool isReserved;
};

/** Constant tokens for values 0 and 1. */
/** Constant tokens for integer values. */
extern const struct Token sqlIntTokens[];

/** Generate a Token object from a string. */
Expand Down

0 comments on commit 9ae7a84

Please sign in to comment.