Skip to content

Commit

Permalink
PS-5674: gen_lex_token generator reworked. Percona tokens move to the…
Browse files Browse the repository at this point in the history
… end of the list, because they do not fit into spare area provided by upstream anymore.
  • Loading branch information
kamil-holubicki authored and dutow committed Nov 29, 2019
1 parent 7cd7064 commit 214212a
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 36 deletions.
112 changes: 91 additions & 21 deletions sql/gen_lex_token.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
- [908 .. 999] reserved for sql_yacc.yy new tokens
- [1000 .. 1017] non terminal tokens from sql_hints.yy
- [1018 .. 1099] reserved for sql_hints.yy new tokens
- [1100 .. 1111] non terminal tokens for digests
- [1100 .. 1299] non terminal tokens from sql_yacc.yy_
- [1300 .. ] Percona tokens from sql_yacc.yy_
Should gen_lex_token fail when tokens are exhausted
(maybe you are reading this comment because of a fprintf(stderr) below),
Expand Down Expand Up @@ -85,8 +86,11 @@
- likewise for sql/sql_hints.yy
*/

int start_token_range_for_sql_hints = 1005; // MERGETODO
int start_token_range_for_sql_hints = 1000;
int start_token_range_for_digests = 1100;
int start_token_range_for_sql_percona = 1300;
int percona_tokens = 0;

/*
This is a tool used during build only,
so MY_MAX_TOKEN does not need to be exact,
Expand All @@ -99,19 +103,21 @@ int start_token_range_for_digests = 1100;
- DIGEST special tokens.
See also YYMAXUTOK.
*/
#define MY_MAX_TOKEN 1200
#define MY_MAX_TOKEN 1400
/** Generated token. */
struct gen_lex_token_string {
const char *m_token_string;
int m_token_length;
bool m_append_space;
bool m_start_expr;
bool m_percona_token;
};

gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
int max_token_seen = 0;
int max_token_seen_in_sql_yacc = 0;
int max_token_seen_in_sql_hints = 0;
int max_token_seen_in_special_tokens = 0;

char char_tokens[256];

Expand All @@ -136,8 +142,9 @@ int tok_unused = 0;
of separate parsers may interfere.
*/
int tok_hint_adjust = 0;
int tok_percona_adjust = 0;

static void set_token(int tok, const char *str) {
static void set_token(int tok, const char *str, bool percona_token = false) {
if (tok <= 0) {
fprintf(stderr, "Bad token found\n");
exit(1);
Expand All @@ -156,12 +163,34 @@ static void set_token(int tok, const char *str) {
compiled_token_array[tok].m_token_length = strlen(str);
compiled_token_array[tok].m_append_space = true;
compiled_token_array[tok].m_start_expr = false;
compiled_token_array[tok].m_percona_token = percona_token;
}

static void set_start_expr_token(int tok) {
compiled_token_array[tok].m_start_expr = true;
}

static void add_percona_tokens()
{
int tok_percona_min = INT_MAX;
for (unsigned int i = 0; i < sizeof(symbols) / sizeof(symbols[0]); i++) {
if (!(symbols[i].percona_symbol)) continue;
if (!(symbols[i].group & SG_MAIN_PARSER)) continue;

if (static_cast<int>(symbols[i].tok) < tok_percona_min)
tok_percona_min = symbols[i].tok; // Calculate the minimal Percona token value.
}

tok_percona_adjust = start_token_range_for_sql_percona - tok_percona_min;

for (unsigned int i = 0; i < sizeof(symbols) / sizeof(symbols[0]); i++) {
if (!(symbols[i].percona_symbol)) continue;
if (!(symbols[i].group & SG_MAIN_PARSER)) continue;
set_token(symbols[i].tok + tok_percona_adjust, symbols[i].name, true);
percona_tokens++;
}
}

static void compute_tokens() {
int tok;
unsigned int i;
Expand All @@ -175,6 +204,7 @@ static void compute_tokens() {
compiled_token_array[tok].m_token_length = 9;
compiled_token_array[tok].m_append_space = true;
compiled_token_array[tok].m_start_expr = false;
compiled_token_array[tok].m_percona_token = false;
}

/*
Expand Down Expand Up @@ -235,6 +265,7 @@ static void compute_tokens() {
See symbols[] in sql/lex.h
*/
for (i = 0; i < sizeof(symbols) / sizeof(symbols[0]); i++) {
if (symbols[i].percona_symbol) continue;
if (!(symbols[i].group & SG_MAIN_PARSER)) continue;
set_token(symbols[i].tok, symbols[i].name);
}
Expand Down Expand Up @@ -325,9 +356,14 @@ static void compute_tokens() {
tok_in_generic_value_expression = max_token_seen++;
set_token(tok_in_generic_value_expression, "IN (...)");

max_token_seen_in_special_tokens = max_token_seen;

/* Percona tokens */
add_percona_tokens();

/* Add new digest tokens here */

tok_unused = max_token_seen++;
tok_unused = start_token_range_for_sql_percona + percona_tokens;
set_token(tok_unused, "UNUSED");

/*
Expand Down Expand Up @@ -400,68 +436,100 @@ static void print_tokens() {
int tok;

printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
printf("lex_token_string lex_token_array[]=\n");
printf("static lex_token_string lex_token_array[]=\n");
printf("{\n");
printf("/* PART 1: character tokens. */\n");

for (tok = 0; tok < 256; tok++) {
printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n", tok, tok,
printf("/* %03d */ { \"\\x%02x\", 1, %s, %s, %d},\n", tok, tok,
compiled_token_array[tok].m_append_space ? "true" : "false",
compiled_token_array[tok].m_start_expr ? "true" : "false");
compiled_token_array[tok].m_start_expr ? "true" : "false",
compiled_token_array[tok].m_percona_token);
}

printf("/* PART 2: named tokens from sql/sql_yacc.yy. */\n");

for (tok = 256; tok <= max_token_seen_in_sql_yacc; tok++) {
printf("/* %03d */ { \"%s\", %d, %s, %s},\n", tok,
printf("/* %03d */ { \"%s\", %d, %s, %s, %d},\n", tok,
compiled_token_array[tok].m_token_string,
compiled_token_array[tok].m_token_length,
compiled_token_array[tok].m_append_space ? "true" : "false",
compiled_token_array[tok].m_start_expr ? "true" : "false");
compiled_token_array[tok].m_start_expr ? "true" : "false",
compiled_token_array[tok].m_percona_token);
}

printf("/* PART 3: padding reserved for sql/sql_yacc.yy extensions. */\n");

for (tok = max_token_seen_in_sql_yacc + 1;
tok < start_token_range_for_sql_hints; tok++) {
printf(
"/* reserved %03d for sql/sql_yacc.yy */ { \"\", 0, false, false},\n",
tok);
"/* reserved %03d for sql/sql_yacc.yy */ { \"\", 0, false, false, %d},\n",
tok, compiled_token_array[tok].m_percona_token);
}

printf("/* PART 4: named tokens from sql/sql_hints.yy. */\n");

for (tok = start_token_range_for_sql_hints;
tok <= max_token_seen_in_sql_hints; tok++) {
printf("/* %03d */ { \"%s\", %d, %s, %s},\n", tok,
printf("/* %03d */ { \"%s\", %d, %s, %s, %d},\n", tok,
compiled_token_array[tok].m_token_string,
compiled_token_array[tok].m_token_length,
compiled_token_array[tok].m_append_space ? "true" : "false",
compiled_token_array[tok].m_start_expr ? "true" : "false");
compiled_token_array[tok].m_start_expr ? "true" : "false",
compiled_token_array[tok].m_percona_token);
}

printf("/* PART 5: padding reserved for sql/sql_hints.yy extensions. */\n");

for (tok = max_token_seen_in_sql_hints + 1;
tok < start_token_range_for_digests; tok++) {
printf(
"/* reserved %03d for sql/sql_hints.yy */ { \"\", 0, false, false},\n",
tok);
"/* reserved %03d for sql/sql_hints.yy */ { \"\", 0, false, false, %d},\n",
tok, compiled_token_array[tok].m_percona_token);
}

printf("/* PART 6: Digest special tokens. */\n");

for (tok = start_token_range_for_digests; tok < max_token_seen; tok++) {
printf("/* %03d */ { \"%s\", %d, %s, %s},\n", tok,
for (tok = start_token_range_for_digests; tok < max_token_seen_in_special_tokens; tok++) {
printf("/* %03d */ { \"%s\", %d, %s, %s, %d},\n", tok,
compiled_token_array[tok].m_token_string,
compiled_token_array[tok].m_token_length,
compiled_token_array[tok].m_append_space ? "true" : "false",
compiled_token_array[tok].m_start_expr ? "true" : "false",
compiled_token_array[tok].m_percona_token);
}

printf("/* PART 7: padding reserved for digest special tokens. */\n");

for (tok = max_token_seen_in_special_tokens + 1;
tok < start_token_range_for_sql_percona; tok++) {
printf(
"/* reserved %03d for digest special tokens */ { \"\", 0, false, false, %d},\n",
tok, compiled_token_array[tok].m_percona_token);
}

printf("/* PART 8: Percona tokens. */\n");

for (tok = start_token_range_for_sql_percona; tok < start_token_range_for_sql_percona + percona_tokens; tok++) {
printf("/* %03d */ { \"%s\", %d, %s, %s, %d},\n", tok,
compiled_token_array[tok].m_token_string,
compiled_token_array[tok].m_token_length,
compiled_token_array[tok].m_append_space ? "true" : "false",
compiled_token_array[tok].m_start_expr ? "true" : "false");
compiled_token_array[tok].m_start_expr ? "true" : "false",
compiled_token_array[tok].m_percona_token);
}

printf("/* PART 7: End of token list. */\n");
printf("/* PART 9: UNUSED token. */\n");
printf("/* %03d */ { \"%s\", %d, %s, %s, %d},\n", tok_unused,
compiled_token_array[tok_unused].m_token_string,
compiled_token_array[tok_unused].m_token_length,
compiled_token_array[tok_unused].m_append_space ? "true" : "false",
compiled_token_array[tok_unused].m_start_expr ? "true" : "false",
compiled_token_array[tok_unused].m_percona_token);

printf("/* PART 10: End of token list. */\n");

printf("/* DUMMY */ { \"\", 0, false, false}\n");
printf("/* DUMMY */ { \"\", 0, false, false, %d}\n", 0);
printf("};\n");
printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");

Expand All @@ -480,6 +548,7 @@ static void print_tokens() {
printf("#define TOK_IN_GENERIC_VALUE_EXPRESSION %d\n",
tok_in_generic_value_expression);
printf("#define TOK_HINT_ADJUST(x) ((x) + %d)\n", tok_hint_adjust);
printf("#define TOK_PERCONA_ADJUST(x) ((x) + %d)\n", tok_percona_adjust);
printf("#define TOK_UNUSED %d\n", tok_unused);
}

Expand Down Expand Up @@ -518,6 +587,7 @@ int main(int, char **) {
printf(" int m_token_length;\n");
printf(" bool m_append_space;\n");
printf(" bool m_start_expr;\n");
printf(" bool m_percona_token;\n");
printf("};\n");
printf("typedef struct lex_token_string lex_token_string;\n");

Expand Down
46 changes: 32 additions & 14 deletions sql/lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,28 @@
#define HINT_COMMENT_STARTER "/*+"
#define HINT_COMMENT_TERMINATOR "*/"

#define SYM(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_KEYWORDS
#define SYM_FN(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_FUNCTIONS
#define SYM_HK(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_HINTABLE_KEYWORDS
#define SYM_H(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_HINTS
#define SYM(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_KEYWORDS, false
#define SYM_FN(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_FUNCTIONS, false
#define SYM_HK(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_HINTABLE_KEYWORDS, false
#define SYM_H(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_HINTS, false

/*
* Percona defined tokens are located together with upstream tokens
* in sql_yacc.yy. However we put them at the end of the token list, after
* hints tokens (sql_hints.yy). When we add add Percona token to the digest
* generator input buffer, we need to adjust its value (shift it up)
* to not clash with adjusted (shifted up) hint tockens.
* That is why we need to detect Percona tokens (following macro)
*
* Example:
* EFFECTIVE_SYM in sql_yacc.h is 1001.
* But hint tag RESOURCE_GROUP after applying shift in Hint_scanner::add_hint_token_digest()
* is 1001 as well. So these 2 would result with the same token in digest
* generator input. To prevent this we detect Percona token and adjust its value
* before adding to digest generator input (Lex_input_stream::add_digest_token())
* Read comments in get_lex_token.cc for additional info.
*/
#define SYM_PERCONA(T, A) STRING_WITH_LEN(T), SYM_OR_NULL(A), SG_KEYWORDS, true

/*
Symbols are broken into separated arrays to allow field names with
Expand Down Expand Up @@ -121,7 +139,7 @@ static const SYMBOL symbols[] = {
{SYM("CHAIN", CHAIN_SYM)},
{SYM("CHANGE", CHANGE)},
{SYM("CHANGED", CHANGED)},
{SYM("CHANGED_PAGE_BITMAPS", CHANGED_PAGE_BITMAPS_SYM)},
{SYM_PERCONA("CHANGED_PAGE_BITMAPS", CHANGED_PAGE_BITMAPS_SYM)},
{SYM("CHANNEL", CHANNEL_SYM)},
{SYM("CHAR", CHAR_SYM)},
{SYM("CHARACTER", CHAR_SYM)},
Expand All @@ -130,10 +148,10 @@ static const SYMBOL symbols[] = {
{SYM("CIPHER", CIPHER_SYM)},
{SYM("CLASS_ORIGIN", CLASS_ORIGIN_SYM)},
{SYM("CLIENT", CLIENT_SYM)},
{SYM("CLIENT_STATISTICS", CLIENT_STATS_SYM)},
{SYM_PERCONA("CLIENT_STATISTICS", CLIENT_STATS_SYM)},
{SYM("CLONE", CLONE_SYM)},
{SYM("CLOSE", CLOSE_SYM)},
{SYM("CLUSTERING", CLUSTERING_SYM)},
{SYM_PERCONA("CLUSTERING", CLUSTERING_SYM)},
{SYM("COALESCE", COALESCE)},
{SYM("CODE", CODE_SYM)},
{SYM("COLLATE", COLLATE_SYM)},
Expand All @@ -150,9 +168,9 @@ static const SYMBOL symbols[] = {
{SYM("COMPONENT", COMPONENT_SYM)},
{SYM("COMPRESSION", COMPRESSION_SYM)},
{SYM("COMPRESSED", COMPRESSED_SYM)},
{SYM("COMPRESSION_DICTIONARY", COMPRESSION_DICTIONARY_SYM)},
{SYM_PERCONA("COMPRESSION_DICTIONARY", COMPRESSION_DICTIONARY_SYM)},
{SYM("ENCRYPTION", ENCRYPTION_SYM)},
{SYM("ENCRYPTION_KEY_ID", ENCRYPTION_KEY_ID_SYM)},
{SYM_PERCONA("ENCRYPTION_KEY_ID", ENCRYPTION_KEY_ID_SYM)},
{SYM("CONCURRENT", CONCURRENT)},
{SYM("CONDITION", CONDITION_SYM)},
{SYM("CONNECTION", CONNECTION_SYM)},
Expand Down Expand Up @@ -218,7 +236,7 @@ static const SYMBOL symbols[] = {
{SYM("DUPLICATE", DUPLICATE_SYM)},
{SYM("DYNAMIC", DYNAMIC_SYM)},
{SYM("EACH", EACH_SYM)},
{SYM("EFFECTIVE", EFFECTIVE_SYM)},
{SYM_PERCONA("EFFECTIVE", EFFECTIVE_SYM)},
{SYM("ELSE", ELSE)},
{SYM("ELSEIF", ELSEIF_SYM)},
{SYM("EMPTY", EMPTY_SYM)},
Expand Down Expand Up @@ -311,7 +329,7 @@ static const SYMBOL symbols[] = {
{SYM("IN", IN_SYM)},
{SYM("INACTIVE", INACTIVE_SYM)},
{SYM("INDEX", INDEX_SYM)},
{SYM("INDEX_STATISTICS", INDEX_STATS_SYM)},
{SYM_PERCONA("INDEX_STATISTICS", INDEX_STATS_SYM)},
{SYM("INDEXES", INDEXES)},
{SYM("INFILE", INFILE)},
{SYM("INITIAL_SIZE", INITIAL_SIZE_SYM)},
Expand Down Expand Up @@ -679,15 +697,15 @@ static const SYMBOL symbols[] = {
{SYM("TABLES", TABLES)},
{SYM("TABLESPACE", TABLESPACE_SYM)},
{SYM("TABLE_CHECKSUM", TABLE_CHECKSUM_SYM)},
{SYM("TABLE_STATISTICS", TABLE_STATS_SYM)},
{SYM_PERCONA("TABLE_STATISTICS", TABLE_STATS_SYM)},
{SYM("TEMPORARY", TEMPORARY)},
{SYM("TEMPTABLE", TEMPTABLE_SYM)},
{SYM("TERMINATED", TERMINATED)},
{SYM("TEXT", TEXT_SYM)},
{SYM("THAN", THAN_SYM)},
{SYM("THEN", THEN_SYM)},
{SYM("THREAD_PRIORITY", THREAD_PRIORITY_SYM)},
{SYM("THREAD_STATISTICS", THREAD_STATS_SYM)},
{SYM_PERCONA("THREAD_STATISTICS", THREAD_STATS_SYM)},
{SYM("TIES", TIES_SYM)},
{SYM("TIME", TIME_SYM)},
{SYM("TIMESTAMP", TIMESTAMP_SYM)},
Expand Down Expand Up @@ -724,7 +742,7 @@ static const SYMBOL symbols[] = {
{SYM("USE", USE_SYM)},
{SYM("USER", USER)},
{SYM("USER_RESOURCES", RESOURCES)},
{SYM("USER_STATISTICS", USER_STATS_SYM)},
{SYM_PERCONA("USER_STATISTICS", USER_STATS_SYM)},
{SYM("USE_FRM", USE_FRM)},
{SYM("USING", USING)},
{SYM("UTC_DATE", UTC_DATE_SYM)},
Expand Down
1 change: 1 addition & 0 deletions sql/lex_symbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ struct SYMBOL {
const unsigned int tok;
/** group mask, see SYM_GROUP enum for bits. */
int group;
bool percona_symbol;
};

struct LEX_SYMBOL {
Expand Down
Loading

0 comments on commit 214212a

Please sign in to comment.