sql: modify TRIM() function signature

According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879
tarantool · Apr 15, 2019 · c36211d · c36211d
1 parent 3f42ef0
commit c36211d
Show file tree

Hide file tree

Showing 7 changed files with 328 additions and 127 deletions.
diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
@@ -277,7 +277,11 @@ static Keyword aKeywordTable[] = {
   { "WHENEVER",               "TK_STANDARD",    RESERVED,         true  },
   { "WHILE",                  "TK_STANDARD",    RESERVED,         true  },
   { "TEXT",                   "TK_TEXT",        RESERVED,         true  },
+  { "TRIM",                   "TK_TRIM",        ALWAYS,           true  },
   { "TRUNCATE",               "TK_TRUNCATE",    ALWAYS,           true  },
+  { "LEADING",                "TK_LEADING",     ALWAYS,           true  },
+  { "TRAILING",               "TK_TRAILING",    ALWAYS,           true  },
+  { "BOTH",                   "TK_BOTH",        ALWAYS,           true  },
 };
 
 /* Number of keywords */

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
@@ -1206,108 +1206,220 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
 	sql_result_text(context, (char *)zOut, j, sql_free);
 }
 
-/*
- * Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
- * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
+enum trim_specification {
+	LEADING = 1,
+	TRAILING = 2,
+	BOTH = 3
+};
+
+/**
+ * Remove chars included into @a collation from @a input_str.
+ * @param context SQL context.
+ * @param flags Trim specification: left, right or both.
+ * @param collation Character set.
+ * @param coll_sz Character set size in bytes.
+ * @param input_str Input string for trimming.
+ * @param input_str_sz Input string size in bytes.
  */
 static void
-trimFunc(sql_context * context, int argc, sql_value ** argv)
+trim_procedure(sql_context * context, enum trim_specification flags,
+	     const unsigned char *collation, int coll_sz,
+	     const unsigned char *input_str, int input_str_sz)
 {
-	const unsigned char *zIn;	/* Input string */
-	const unsigned char *zCharSet;	/* Set of characters to trim */
-	int nIn;		/* Number of bytes in input */
-	int flags;		/* 1: trimleft  2: trimright  3: trim */
-	int i;			/* Loop counter */
-	unsigned char *aLen = 0;	/* Length of each character in zCharSet */
-	unsigned char **azChar = 0;	/* Individual characters in zCharSet */
-	int nChar;		/* Number of characters in zCharSet */
+	int i;
+	/* 
+	 * Length of each character in collation.
+	 */
+	unsigned char *aLen = 0;
+	/*
+	 * Individual characters in collation.
+	 */
+	unsigned char **azChar = 0;
+	/* 
+	 * Number of characters in zCharSet.
+	 */
+	int nChar;
 
-	if (sql_value_type(argv[0]) == SQL_NULL) {
-		return;
-	}
-	zIn = sql_value_text(argv[0]);
-	if (zIn == 0)
-		return;
-	nIn = sql_value_bytes(argv[0]);
-	assert(zIn == sql_value_text(argv[0]));
-	if (argc == 1) {
-		static const unsigned char lenOne[] = { 1 };
-		static unsigned char *const azOne[] = { (u8 *) " " };
-		nChar = 1;
-		aLen = (u8 *) lenOne;
-		azChar = (unsigned char **)azOne;
-		zCharSet = 0;
-	} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
-		return;
-	} else {
-		const unsigned char *z = zCharSet;
-		int trim_set_sz = sql_value_bytes(argv[1]);
-		/*
-		* Count the number of UTF-8 characters passing
-		* through the entire char set, but not up
-		* to the '\0' or X'00' character. This allows
-		* to handle trimming set containing such
-		* characters.
-		*/
-		nChar = sql_utf8_char_count(z, trim_set_sz);
-		if (nChar > 0) {
-			azChar =
-			    contextMalloc(context,
-					  ((i64) nChar) * (sizeof(char *) + 1));
-			if (azChar == 0) {
-				return;
-			}
-			aLen = (unsigned char *)&azChar[nChar];
-			z = zCharSet;
-			i = 0;
-			nChar = 0;
-			int handled_bytes_cnt = trim_set_sz;
-			while(handled_bytes_cnt > 0) {
-				azChar[nChar] = (unsigned char *)(z + i);
-				SQL_UTF8_FWD_1(z, i, trim_set_sz);
-				aLen[nChar] = (u8) (z + i - azChar[nChar]);
-				handled_bytes_cnt -= aLen[nChar];
-				nChar++;
-			}
+	const unsigned char *z = collation;
+	/*
+	 * Count the number of UTF-8 characters passing
+	 * through the entire char set, but not up
+	 * to the '\0' or X'00' character. This allows
+	 * to handle trimming set containing such
+	 * characters.
+	 */
+	nChar = sql_utf8_char_count(z, coll_sz);
+	if (nChar > 0) {
+		azChar =
+		    contextMalloc(context,
+				  ((i64) nChar) * (sizeof(char *) + 1));
+		if (azChar == 0) {
+			return;
+		}
+		aLen = (unsigned char *)&azChar[nChar];
+		z = collation;
+		i = 0;
+		nChar = 0;
+		int handled_bytes_cnt = coll_sz;
+		while(handled_bytes_cnt > 0) {
+			azChar[nChar] = (unsigned char *)(z + i);
+			SQL_UTF8_FWD_1(z, i, coll_sz);
+			aLen[nChar] = (u8) (z + i - azChar[nChar]);
+			handled_bytes_cnt -= aLen[nChar];
+			nChar++;
 		}
 	}
 	if (nChar > 0) {
-		flags = SQL_PTR_TO_INT(sql_user_data(context));
 		if (flags & 1) {
-			while (nIn > 0) {
+			while (input_str_sz > 0) {
 				int len = 0;
 				for (i = 0; i < nChar; i++) {
 					len = aLen[i];
-					if (len <= nIn
-					    && memcmp(zIn, azChar[i], len) == 0)
+					if (len <= input_str_sz
+					    && memcmp(input_str,
+						      azChar[i], len) == 0)
 						break;
 				}
 				if (i >= nChar)
 					break;
-				zIn += len;
-				nIn -= len;
+				input_str += len;
+				input_str_sz -= len;
 			}
 		}
 		if (flags & 2) {
-			while (nIn > 0) {
+			while (input_str_sz > 0) {
 				int len = 0;
 				for (i = 0; i < nChar; i++) {
 					len = aLen[i];
-					if (len <= nIn
-					    && memcmp(&zIn[nIn - len],
+					if (len <= input_str_sz
+					    && memcmp(&input_str[input_str_sz - len],
 						      azChar[i], len) == 0)
 						break;
 				}
 				if (i >= nChar)
 					break;
-				nIn -= len;
+				input_str_sz -= len;
 			}
 		}
-		if (zCharSet) {
+		if (collation) {
 			sql_free(azChar);
 		}
 	}
-	sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT);
+	sql_result_text(context, (char *)input_str,input_str_sz,
+			SQL_TRANSIENT);
+}
+
+/**
+ * Normalize args from @a argv input array when it has one arg only.
+ * 
+ * Case: TRIM(<str>)
+ * Call trimming procedure with BOTH as the flags and " " as the collation.
+ *
+ * @param context SQL context.
+ * @param argc Number of args.
+ * @param argv Args array.
+ */
+static void
+trim_func_one_arg(sql_context * context, int argc, sql_value **argv)
+{
+	const unsigned char *input_str;
+	assert(argc == 1);
+
+	if (sql_value_type(argv[0]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[0])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[0]);
+	assert(input_str == sql_value_text(argv[0]));
+
+	trim_procedure(context, BOTH, (const unsigned char *) " ",
+		       1, input_str, input_str_sz);
+}
+
+/**
+ * Normalize args from @a argv input array when it has two args.
+ * 
+ * Case: TRIM(<trim_collation> FROM <str>)
+ * If user has specified <trim_collation> only, call trimming procedure with
+ * BOTH as the flags and that collation.
+ *
+ * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
+ * If user has specified side keyword only, call trimming procedure
+ * with the specified side and " " as the collation.
+ *
+ * @param context SQL context.
+ * @param argc Number of args.
+ * @param argv Args array.
+ */
+static void
+trim_func_two_arg(sql_context * context, int argc, sql_value **argv)
+{
+	const unsigned char *input_str;
+	assert(argc == 2);
+
+	if (sql_value_type(argv[1]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[1])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[1]);
+	assert(input_str == sql_value_text(argv[1]));
+
+	const unsigned char *collation;
+	if (sql_value_type(argv[0]) == SQL_INTEGER) {
+		trim_procedure(context, sql_value_int(argv[0]),
+			       (const unsigned char *) " ", 1,
+			       input_str, input_str_sz);
+	} else if ((collation = sql_value_text(argv[0])) == NULL) {
+		return;
+	} else {
+		int coll_sz = sql_value_bytes(argv[0]);
+		trim_procedure(context, BOTH, collation, coll_sz, input_str,
+			       input_str_sz);
+	}
+}
+
+/**
+ * Normalize args from @a argv input array when it has three args.
+ *
+ * Case: TRIM(LEADING/TRAILING/BOTH <trim_collation> FROM <str>)
+ * User has specified side keyword and <trim_collation>, call trimming
+ * procedure with that args.
+ *
+ * @param context SQL context.
+ * @param argc Number of args.
+ * @param argv Args array.
+ */
+static void
+trim_func_three_arg(sql_context * context, int argc, sql_value **argv)
+{
+	const unsigned char *input_str;
+	assert(argc == 3);
+
+	if (sql_value_type(argv[2]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[2])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[2]);
+	assert(input_str == sql_value_text(argv[2]));
+
+	const unsigned char *collation;
+	assert(sql_value_type(argv[0]) == SQL_INTEGER);
+	if ((collation = sql_value_text(argv[1])) != 0) {
+		int coll_sz = sql_value_bytes(argv[1]);
+		trim_procedure(context, sql_value_int(argv[0]), collation,
+			       coll_sz, input_str, input_str_sz);
+	} else {
+		return;
+	}
 }
 
 #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION
@@ -1738,12 +1850,9 @@ sqlRegisterBuiltinFunctions(void)
 			  FIELD_TYPE_INTEGER),
 		FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
 			  FIELD_TYPE_INTEGER),
-		FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
-		FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
-		FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
-		FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
-		FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
-		FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
+		FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg),
+		FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_arg),
+		FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_arg),
 		FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
 		FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
 		AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,

diff --git a/src/box/sql/global.c b/src/box/sql/global.c
@@ -223,11 +223,13 @@ SQL_WSD struct sqlConfig sqlConfig = {
 FuncDefHash sqlBuiltinFunctions;
 
 /*
- * Constant tokens for values 0 and 1.
+ * Constant tokens for necessary integer values.
  */
 const Token sqlIntTokens[] = {
 	{"0", 1, false},
-	{"1", 1, false}
+	{"1", 1, false},
+	{"2", 1, false},
+	{"3", 1, false}
 };
 
 /*

diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
@@ -937,6 +937,50 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
   sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
 }
 %endif  SQL_OMIT_CAST
+
+expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
+    A.pExpr = sqlExprFunction(pParse, Y, &X);
+    spanSet(&A, &X, &E);
+  }
+
+%type trim_operands {struct ExprList *}
+%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); }
+
+trim_operands(A) ::= trim_from_clause(F) expr(Y). {
+    A = sql_expr_list_append(pParse->db, F, Y.pExpr);
+}
+
+trim_operands(A) ::= expr(Y). {
+    A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+}
+
+%type trim_from_clause {struct ExprList *}
+%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); }
+
+trim_from_clause(A) ::= expr(Y) FROM. {
+    A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+}
+
+trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. {
+  struct Expr *p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
+  A = sql_expr_list_append(pParse->db, NULL, p);
+  if (Y != NULL) {
+    A = sql_expr_list_append(pParse->db, A, Y);
+  }
+}
+
+%type trim_character {struct Expr *}
+%destructor trim_character {sql_expr_delete(pParse->db, $$, false);}
+
+trim_character(A) ::= . { A = NULL; }
+trim_character(A) ::= expr(X). { A = X.pExpr; }
+
+%type trim_specification {int}
+
+trim_specification(A) ::= LEADING.  {A = 1;}
+trim_specification(A) ::= TRAILING. {A = 2;}
+trim_specification(A) ::= BOTH.     {A = 3;}
+
 expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). {
   if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){
     const char *err =