Skip to content

Commit

Permalink
sql: modify TRIM() function signature
Browse files Browse the repository at this point in the history
According to the ANSI standard, ltrim, rtrim and trim should
be merged into one unified TRIM() function. The specialization of
trimming (left, right or both and trimming characters) determined
in arguments of this function.

Closes #3879
  • Loading branch information
romanhabibov committed Apr 15, 2019
1 parent 3f42ef0 commit c36211d
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 127 deletions.
4 changes: 4 additions & 0 deletions extra/mkkeywordhash.c
Expand Up @@ -277,7 +277,11 @@ static Keyword aKeywordTable[] = {
{ "WHENEVER", "TK_STANDARD", RESERVED, true },
{ "WHILE", "TK_STANDARD", RESERVED, true },
{ "TEXT", "TK_TEXT", RESERVED, true },
{ "TRIM", "TK_TRIM", ALWAYS, true },
{ "TRUNCATE", "TK_TRUNCATE", ALWAYS, true },
{ "LEADING", "TK_LEADING", ALWAYS, true },
{ "TRAILING", "TK_TRAILING", ALWAYS, true },
{ "BOTH", "TK_BOTH", ALWAYS, true },
};

/* Number of keywords */
Expand Down
263 changes: 186 additions & 77 deletions src/box/sql/func.c
Expand Up @@ -1206,108 +1206,220 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
sql_result_text(context, (char *)zOut, j, sql_free);
}

/*
* Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
* The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
enum trim_specification {
LEADING = 1,
TRAILING = 2,
BOTH = 3
};

/**
* Remove chars included into @a collation from @a input_str.
* @param context SQL context.
* @param flags Trim specification: left, right or both.
* @param collation Character set.
* @param coll_sz Character set size in bytes.
* @param input_str Input string for trimming.
* @param input_str_sz Input string size in bytes.
*/
static void
trimFunc(sql_context * context, int argc, sql_value ** argv)
trim_procedure(sql_context * context, enum trim_specification flags,
const unsigned char *collation, int coll_sz,
const unsigned char *input_str, int input_str_sz)
{
const unsigned char *zIn; /* Input string */
const unsigned char *zCharSet; /* Set of characters to trim */
int nIn; /* Number of bytes in input */
int flags; /* 1: trimleft 2: trimright 3: trim */
int i; /* Loop counter */
unsigned char *aLen = 0; /* Length of each character in zCharSet */
unsigned char **azChar = 0; /* Individual characters in zCharSet */
int nChar; /* Number of characters in zCharSet */
int i;
/*
* Length of each character in collation.
*/
unsigned char *aLen = 0;
/*
* Individual characters in collation.
*/
unsigned char **azChar = 0;
/*
* Number of characters in zCharSet.
*/
int nChar;

if (sql_value_type(argv[0]) == SQL_NULL) {
return;
}
zIn = sql_value_text(argv[0]);
if (zIn == 0)
return;
nIn = sql_value_bytes(argv[0]);
assert(zIn == sql_value_text(argv[0]));
if (argc == 1) {
static const unsigned char lenOne[] = { 1 };
static unsigned char *const azOne[] = { (u8 *) " " };
nChar = 1;
aLen = (u8 *) lenOne;
azChar = (unsigned char **)azOne;
zCharSet = 0;
} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
return;
} else {
const unsigned char *z = zCharSet;
int trim_set_sz = sql_value_bytes(argv[1]);
/*
* Count the number of UTF-8 characters passing
* through the entire char set, but not up
* to the '\0' or X'00' character. This allows
* to handle trimming set containing such
* characters.
*/
nChar = sql_utf8_char_count(z, trim_set_sz);
if (nChar > 0) {
azChar =
contextMalloc(context,
((i64) nChar) * (sizeof(char *) + 1));
if (azChar == 0) {
return;
}
aLen = (unsigned char *)&azChar[nChar];
z = zCharSet;
i = 0;
nChar = 0;
int handled_bytes_cnt = trim_set_sz;
while(handled_bytes_cnt > 0) {
azChar[nChar] = (unsigned char *)(z + i);
SQL_UTF8_FWD_1(z, i, trim_set_sz);
aLen[nChar] = (u8) (z + i - azChar[nChar]);
handled_bytes_cnt -= aLen[nChar];
nChar++;
}
const unsigned char *z = collation;
/*
* Count the number of UTF-8 characters passing
* through the entire char set, but not up
* to the '\0' or X'00' character. This allows
* to handle trimming set containing such
* characters.
*/
nChar = sql_utf8_char_count(z, coll_sz);
if (nChar > 0) {
azChar =
contextMalloc(context,
((i64) nChar) * (sizeof(char *) + 1));
if (azChar == 0) {
return;
}
aLen = (unsigned char *)&azChar[nChar];
z = collation;
i = 0;
nChar = 0;
int handled_bytes_cnt = coll_sz;
while(handled_bytes_cnt > 0) {
azChar[nChar] = (unsigned char *)(z + i);
SQL_UTF8_FWD_1(z, i, coll_sz);
aLen[nChar] = (u8) (z + i - azChar[nChar]);
handled_bytes_cnt -= aLen[nChar];
nChar++;
}
}
if (nChar > 0) {
flags = SQL_PTR_TO_INT(sql_user_data(context));
if (flags & 1) {
while (nIn > 0) {
while (input_str_sz > 0) {
int len = 0;
for (i = 0; i < nChar; i++) {
len = aLen[i];
if (len <= nIn
&& memcmp(zIn, azChar[i], len) == 0)
if (len <= input_str_sz
&& memcmp(input_str,
azChar[i], len) == 0)
break;
}
if (i >= nChar)
break;
zIn += len;
nIn -= len;
input_str += len;
input_str_sz -= len;
}
}
if (flags & 2) {
while (nIn > 0) {
while (input_str_sz > 0) {
int len = 0;
for (i = 0; i < nChar; i++) {
len = aLen[i];
if (len <= nIn
&& memcmp(&zIn[nIn - len],
if (len <= input_str_sz
&& memcmp(&input_str[input_str_sz - len],
azChar[i], len) == 0)
break;
}
if (i >= nChar)
break;
nIn -= len;
input_str_sz -= len;
}
}
if (zCharSet) {
if (collation) {
sql_free(azChar);
}
}
sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT);
sql_result_text(context, (char *)input_str,input_str_sz,
SQL_TRANSIENT);
}

/**
* Normalize args from @a argv input array when it has one arg only.
*
* Case: TRIM(<str>)
* Call trimming procedure with BOTH as the flags and " " as the collation.
*
* @param context SQL context.
* @param argc Number of args.
* @param argv Args array.
*/
static void
trim_func_one_arg(sql_context * context, int argc, sql_value **argv)
{
const unsigned char *input_str;
assert(argc == 1);

if (sql_value_type(argv[0]) == SQL_NULL) {
return;
}
if ((input_str = sql_value_text(argv[0])) == NULL) {
return;
}

int input_str_sz = sql_value_bytes(argv[0]);
assert(input_str == sql_value_text(argv[0]));

trim_procedure(context, BOTH, (const unsigned char *) " ",
1, input_str, input_str_sz);
}

/**
* Normalize args from @a argv input array when it has two args.
*
* Case: TRIM(<trim_collation> FROM <str>)
* If user has specified <trim_collation> only, call trimming procedure with
* BOTH as the flags and that collation.
*
* Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
* If user has specified side keyword only, call trimming procedure
* with the specified side and " " as the collation.
*
* @param context SQL context.
* @param argc Number of args.
* @param argv Args array.
*/
static void
trim_func_two_arg(sql_context * context, int argc, sql_value **argv)
{
const unsigned char *input_str;
assert(argc == 2);

if (sql_value_type(argv[1]) == SQL_NULL) {
return;
}
if ((input_str = sql_value_text(argv[1])) == NULL) {
return;
}

int input_str_sz = sql_value_bytes(argv[1]);
assert(input_str == sql_value_text(argv[1]));

const unsigned char *collation;
if (sql_value_type(argv[0]) == SQL_INTEGER) {
trim_procedure(context, sql_value_int(argv[0]),
(const unsigned char *) " ", 1,
input_str, input_str_sz);
} else if ((collation = sql_value_text(argv[0])) == NULL) {
return;
} else {
int coll_sz = sql_value_bytes(argv[0]);
trim_procedure(context, BOTH, collation, coll_sz, input_str,
input_str_sz);
}
}

/**
* Normalize args from @a argv input array when it has three args.
*
* Case: TRIM(LEADING/TRAILING/BOTH <trim_collation> FROM <str>)
* User has specified side keyword and <trim_collation>, call trimming
* procedure with that args.
*
* @param context SQL context.
* @param argc Number of args.
* @param argv Args array.
*/
static void
trim_func_three_arg(sql_context * context, int argc, sql_value **argv)
{
const unsigned char *input_str;
assert(argc == 3);

if (sql_value_type(argv[2]) == SQL_NULL) {
return;
}
if ((input_str = sql_value_text(argv[2])) == NULL) {
return;
}

int input_str_sz = sql_value_bytes(argv[2]);
assert(input_str == sql_value_text(argv[2]));

const unsigned char *collation;
assert(sql_value_type(argv[0]) == SQL_INTEGER);
if ((collation = sql_value_text(argv[1])) != 0) {
int coll_sz = sql_value_bytes(argv[1]);
trim_procedure(context, sql_value_int(argv[0]), collation,
coll_sz, input_str, input_str_sz);
} else {
return;
}
}

#ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION
Expand Down Expand Up @@ -1738,12 +1850,9 @@ sqlRegisterBuiltinFunctions(void)
FIELD_TYPE_INTEGER),
FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
FIELD_TYPE_INTEGER),
FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg),
FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_arg),
FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_arg),
FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,
Expand Down
6 changes: 4 additions & 2 deletions src/box/sql/global.c
Expand Up @@ -223,11 +223,13 @@ SQL_WSD struct sqlConfig sqlConfig = {
FuncDefHash sqlBuiltinFunctions;

/*
* Constant tokens for values 0 and 1.
* Constant tokens for necessary integer values.
*/
const Token sqlIntTokens[] = {
{"0", 1, false},
{"1", 1, false}
{"1", 1, false},
{"2", 1, false},
{"3", 1, false}
};

/*
Expand Down
44 changes: 44 additions & 0 deletions src/box/sql/parse.y
Expand Up @@ -937,6 +937,50 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
}
%endif SQL_OMIT_CAST

expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
A.pExpr = sqlExprFunction(pParse, Y, &X);
spanSet(&A, &X, &E);
}

%type trim_operands {struct ExprList *}
%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); }

trim_operands(A) ::= trim_from_clause(F) expr(Y). {
A = sql_expr_list_append(pParse->db, F, Y.pExpr);
}

trim_operands(A) ::= expr(Y). {
A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
}

%type trim_from_clause {struct ExprList *}
%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); }

trim_from_clause(A) ::= expr(Y) FROM. {
A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
}

trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. {
struct Expr *p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
A = sql_expr_list_append(pParse->db, NULL, p);
if (Y != NULL) {
A = sql_expr_list_append(pParse->db, A, Y);
}
}

%type trim_character {struct Expr *}
%destructor trim_character {sql_expr_delete(pParse->db, $$, false);}

trim_character(A) ::= . { A = NULL; }
trim_character(A) ::= expr(X). { A = X.pExpr; }

%type trim_specification {int}

trim_specification(A) ::= LEADING. {A = 1;}
trim_specification(A) ::= TRAILING. {A = 2;}
trim_specification(A) ::= BOTH. {A = 3;}

expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). {
if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){
const char *err =
Expand Down

0 comments on commit c36211d

Please sign in to comment.