Skip to content

Commit

Permalink
feat: support all utf8 characters as alias
Browse files Browse the repository at this point in the history
  • Loading branch information
wangjiaming0909 committed Oct 27, 2023
1 parent e7e7748 commit cf158ab
Show file tree
Hide file tree
Showing 5 changed files with 4,186 additions and 4,020 deletions.
229 changes: 115 additions & 114 deletions include/common/ttokendef.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,120 +246,121 @@
#define TK_INSERT 227
#define TK_NULL 228
#define TK_NK_QUESTION 229
#define TK_NK_ARROW 230
#define TK_ROWTS 231
#define TK_QSTART 232
#define TK_QEND 233
#define TK_QDURATION 234
#define TK_WSTART 235
#define TK_WEND 236
#define TK_WDURATION 237
#define TK_IROWTS 238
#define TK_ISFILLED 239
#define TK_CAST 240
#define TK_NOW 241
#define TK_TODAY 242
#define TK_TIMEZONE 243
#define TK_CLIENT_VERSION 244
#define TK_SERVER_VERSION 245
#define TK_SERVER_STATUS 246
#define TK_CURRENT_USER 247
#define TK_CASE 248
#define TK_WHEN 249
#define TK_THEN 250
#define TK_ELSE 251
#define TK_BETWEEN 252
#define TK_IS 253
#define TK_NK_LT 254
#define TK_NK_GT 255
#define TK_NK_LE 256
#define TK_NK_GE 257
#define TK_NK_NE 258
#define TK_MATCH 259
#define TK_NMATCH 260
#define TK_CONTAINS 261
#define TK_IN 262
#define TK_JOIN 263
#define TK_INNER 264
#define TK_SELECT 265
#define TK_NK_HINT 266
#define TK_DISTINCT 267
#define TK_WHERE 268
#define TK_PARTITION 269
#define TK_BY 270
#define TK_SESSION 271
#define TK_STATE_WINDOW 272
#define TK_EVENT_WINDOW 273
#define TK_SLIDING 274
#define TK_FILL 275
#define TK_VALUE 276
#define TK_VALUE_F 277
#define TK_NONE 278
#define TK_PREV 279
#define TK_NULL_F 280
#define TK_LINEAR 281
#define TK_NEXT 282
#define TK_HAVING 283
#define TK_RANGE 284
#define TK_EVERY 285
#define TK_ORDER 286
#define TK_SLIMIT 287
#define TK_SOFFSET 288
#define TK_LIMIT 289
#define TK_OFFSET 290
#define TK_ASC 291
#define TK_NULLS 292
#define TK_ABORT 293
#define TK_AFTER 294
#define TK_ATTACH 295
#define TK_BEFORE 296
#define TK_BEGIN 297
#define TK_BITAND 298
#define TK_BITNOT 299
#define TK_BITOR 300
#define TK_BLOCKS 301
#define TK_CHANGE 302
#define TK_COMMA 303
#define TK_CONCAT 304
#define TK_CONFLICT 305
#define TK_COPY 306
#define TK_DEFERRED 307
#define TK_DELIMITERS 308
#define TK_DETACH 309
#define TK_DIVIDE 310
#define TK_DOT 311
#define TK_EACH 312
#define TK_FAIL 313
#define TK_FILE 314
#define TK_FOR 315
#define TK_GLOB 316
#define TK_ID 317
#define TK_IMMEDIATE 318
#define TK_IMPORT 319
#define TK_INITIALLY 320
#define TK_INSTEAD 321
#define TK_ISNULL 322
#define TK_KEY 323
#define TK_MODULES 324
#define TK_NK_BITNOT 325
#define TK_NK_SEMI 326
#define TK_NOTNULL 327
#define TK_OF 328
#define TK_PLUS 329
#define TK_PRIVILEGE 330
#define TK_RAISE 331
#define TK_RESTRICT 332
#define TK_ROW 333
#define TK_SEMI 334
#define TK_STAR 335
#define TK_STATEMENT 336
#define TK_STRICT 337
#define TK_STRING 338
#define TK_TIMES 339
#define TK_VALUES 340
#define TK_VARIABLE 341
#define TK_VIEW 342
#define TK_WAL 343
#define TK_NK_ALIAS 230
#define TK_NK_ARROW 231
#define TK_ROWTS 232
#define TK_QSTART 233
#define TK_QEND 234
#define TK_QDURATION 235
#define TK_WSTART 236
#define TK_WEND 237
#define TK_WDURATION 238
#define TK_IROWTS 239
#define TK_ISFILLED 240
#define TK_CAST 241
#define TK_NOW 242
#define TK_TODAY 243
#define TK_TIMEZONE 244
#define TK_CLIENT_VERSION 245
#define TK_SERVER_VERSION 246
#define TK_SERVER_STATUS 247
#define TK_CURRENT_USER 248
#define TK_CASE 249
#define TK_WHEN 250
#define TK_THEN 251
#define TK_ELSE 252
#define TK_BETWEEN 253
#define TK_IS 254
#define TK_NK_LT 255
#define TK_NK_GT 256
#define TK_NK_LE 257
#define TK_NK_GE 258
#define TK_NK_NE 259
#define TK_MATCH 260
#define TK_NMATCH 261
#define TK_CONTAINS 262
#define TK_IN 263
#define TK_JOIN 264
#define TK_INNER 265
#define TK_SELECT 266
#define TK_NK_HINT 267
#define TK_DISTINCT 268
#define TK_WHERE 269
#define TK_PARTITION 270
#define TK_BY 271
#define TK_SESSION 272
#define TK_STATE_WINDOW 273
#define TK_EVENT_WINDOW 274
#define TK_SLIDING 275
#define TK_FILL 276
#define TK_VALUE 277
#define TK_VALUE_F 278
#define TK_NONE 279
#define TK_PREV 280
#define TK_NULL_F 281
#define TK_LINEAR 282
#define TK_NEXT 283
#define TK_HAVING 284
#define TK_RANGE 285
#define TK_EVERY 286
#define TK_ORDER 287
#define TK_SLIMIT 288
#define TK_SOFFSET 289
#define TK_LIMIT 290
#define TK_OFFSET 291
#define TK_ASC 292
#define TK_NULLS 293
#define TK_ABORT 294
#define TK_AFTER 295
#define TK_ATTACH 296
#define TK_BEFORE 297
#define TK_BEGIN 298
#define TK_BITAND 299
#define TK_BITNOT 300
#define TK_BITOR 301
#define TK_BLOCKS 302
#define TK_CHANGE 303
#define TK_COMMA 304
#define TK_CONCAT 305
#define TK_CONFLICT 306
#define TK_COPY 307
#define TK_DEFERRED 308
#define TK_DELIMITERS 309
#define TK_DETACH 310
#define TK_DIVIDE 311
#define TK_DOT 312
#define TK_EACH 313
#define TK_FAIL 314
#define TK_FILE 315
#define TK_FOR 316
#define TK_GLOB 317
#define TK_ID 318
#define TK_IMMEDIATE 319
#define TK_IMPORT 320
#define TK_INITIALLY 321
#define TK_INSTEAD 322
#define TK_ISNULL 323
#define TK_KEY 324
#define TK_MODULES 325
#define TK_NK_BITNOT 326
#define TK_NK_SEMI 327
#define TK_NOTNULL 328
#define TK_OF 329
#define TK_PLUS 330
#define TK_PRIVILEGE 331
#define TK_RAISE 332
#define TK_RESTRICT 333
#define TK_ROW 334
#define TK_SEMI 335
#define TK_STAR 336
#define TK_STATEMENT 337
#define TK_STRICT 338
#define TK_STRING 339
#define TK_TIMES 340
#define TK_VALUES 341
#define TK_VARIABLE 342
#define TK_VIEW 343
#define TK_WAL 344



Expand Down
3 changes: 3 additions & 0 deletions source/libs/parser/inc/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ table_alias(A) ::= NK_ID(B).
%type column_alias { SToken }
%destructor column_alias { }
column_alias(A) ::= NK_ID(B). { A = B; }
column_alias(A) ::= NK_ALIAS(B). { A = B; }

%type user_name { SToken }
%destructor user_name { }
Expand Down Expand Up @@ -873,6 +874,8 @@ expression_list(A) ::= expression_list(B) NK_COMMA expr_or_subquery(C).

column_reference(A) ::= column_name(B). { A = createRawExprNode(pCxt, &B, createColumnNode(pCxt, NULL, &B)); }
column_reference(A) ::= table_name(B) NK_DOT column_name(C). { A = createRawExprNodeExt(pCxt, &B, &C, createColumnNode(pCxt, &B, &C)); }
column_reference(A) ::= NK_ALIAS(B). { A = createRawExprNode(pCxt, &B, createColumnNode(pCxt, NULL, &B)); }
column_reference(A) ::= table_name(B) NK_DOT NK_ALIAS(C). { A = createRawExprNodeExt(pCxt, &B, &C, createColumnNode(pCxt, &B, &C)); }

pseudo_column(A) ::= ROWTS(B). { A = createRawExprNode(pCxt, &B, createFunctionNode(pCxt, &B, NULL)); }
pseudo_column(A) ::= TBNAME(B). { A = createRawExprNode(pCxt, &B, createFunctionNode(pCxt, &B, NULL)); }
Expand Down
31 changes: 27 additions & 4 deletions source/libs/parser/src/parTokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,9 +627,21 @@ uint32_t tGetToken(const char* z, uint32_t* tokenId) {
case 't':
case 'F':
case 'f': {
for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
bool hasNonAsciiChars = false;
for (i = 1;; i++) {
if ((z[i] & 0x80) != 0) {
// utf-8 characters
// currently, we support using utf-8 characters only in alias
hasNonAsciiChars = true;
} else if (isIdChar[(uint8_t)z[i]]) {
} else {
break;
}
}
if (hasNonAsciiChars) {
*tokenId = TK_NK_ALIAS; // must be alias
return i;
}

if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
*tokenId = TK_NK_BOOL;
return i;
Expand All @@ -638,10 +650,21 @@ uint32_t tGetToken(const char* z, uint32_t* tokenId) {
return i;
}
default: {
if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
if ((*z & 0x80) == 0 && !isIdChar[(uint8_t)*z]) {
break;
}
for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
bool hasNonAsciiChars = false;
for (i = 1; ; i++) {
if ((z[i] & 0x80) != 0) {
hasNonAsciiChars = true;
} else if (isIdChar[(uint8_t)z[i]]){
} else {
break;
}
}
if (hasNonAsciiChars) {
*tokenId = TK_NK_ALIAS;
return i;
}
*tokenId = tKeywordCode(z, i);
return i;
Expand Down

0 comments on commit cf158ab

Please sign in to comment.