Permalink
Browse files

Change lexer definition of high bytes.

flex does not(*) support UTF-8, therefore
alpha	[a-z_\.À-゚]
has always been interpreted by flex as
alpha	[a-z_\.\xC3\x80-\xEF\xBE\x9F]

I assume this is not what was indetend and the only reason it worked, is because C3 (195),BE (190) and 9F (159) are already covered by 80-EF (128-239). Incidentally this range would also cover the whole Unicode BMP in UTF8.
This change should make it more obvious.

(*) There were some patches in 2012 and 2014 but they don't seem to have been merged.
  • Loading branch information...
dmoagx committed Feb 1, 2015
1 parent c25bb06 commit 60a5d64518f09af80059e2e293849ff6b0d6be75
Showing with 4 additions and 4 deletions.
  1. +3 −3 Source/SPEditorTokens.l
  2. +1 −1 Source/SPSQLTokenizer.l
@@ -56,11 +56,11 @@ size_t yyuoffset, yyuleng;
%option case-insensitive
s [ \t\n\r]+
alpha [a-z_\.À-゚]
alpha [a-z_\.\x80-\xEF]
numeric ([+-]?(([0-9]+\.[0-9]+)|([0-9]*\.[0-9]+)|([0-9]+))(e[+-]?[0-9]+)?)
ops "+"|"-"|"*"|"/"
word [a-z_\.0-9À-゚@]
variable @{1,2}[a-z_\.0-9À-゚$]+
word [a-z_\.0-9\x80-\xEF@]
variable @{1,2}[a-z_\.0-9\x80-\xEF$]+
keyworda (G(R(OUP{s}BY|ANT(S)?)|E(NERAL|T_FORMAT|OMETRY(COLLECTION)?)|LOBAL)|B(Y(TE)?|TREE|I(GINT|N(LOG|ARY)|T)|O(TH|OL(EAN)?)|E(GIN|TWEEN|FORE)|LOB|ACKUP{s}TABLE)|H(IGH_PRIORITY|O(ST(S)?|UR(_(MI(NUTE|CROSECOND)|SECOND))?)|ELP|A(SH|NDLER|VING))|C(R(OSS|EATE)|H(ECK(SUM)?|A(R(SET|ACTER)?|NGE(D)?|IN))|IPHER|O(M(M(IT(TED)?|ENT)|P(RESSED|LETION|ACT))|N(S(TRAINT(_(SCHEMA|NAME|CATALOG))?|ISTENT)|NECTION|CURRENT|T(RIBUTORS|INUE|AINS)|DITION|VERT)|DE|L(UMN(S|_(NAME|FORMAT))?|LATE)|ALESCE{s}PARTITION)|U(R(RENT_(TIME(STAMP)?|DATE|USER)|SOR(_NAME)?)|BE)|L(IENT|OSE|ASS_ORIGIN)|A(S(CADE(D)?|E)|CHE{s}INDEX|TALOG_NAME|LL))|I(GNORE(_SERVER_IDS)?|MPORT{s}TABLESPACE|S(SUER|OLATION)?|N(S(TALL({s}PLUGIN)?|E(RT(_METHOD)?|NSITIVE))|N(O(BASE|DB)|ER)|T(1|2|8|3|O({s}(DUMP|OUT)FILE)?|4|E(RVAL|GER))?|ITIAL_SIZE|OUT|DEX(ES)?|VOKER|FILE)?|TERATE|O_THREAD|DENTIFIED|F)|D(ROP|YNAMIC|I(RECTORY|S(CARD{s}TABLESPACE|TINCT(ROW)?|K|ABLE{s}KEYS)|V)|O(UBLE)?|U(MPFILE|PLICATE|AL)|E(S(C(RIBE)?|_KEY_FILE)|C(IMAL|LARE)?|TERMINISTIC|F(INER|AULT)|L(ETE|AY(_KEY_WRITE|ED))|ALLOCATE)|A(Y(_(MI(NUTE|CROSECOND)|SECOND|HOUR))?|T(E(TIME)?|A(BASE(S)?|FILE)?)))|JOIN|E(RRORS|X(TEN(T_SIZE|DED)|I(STS|T)|P(LAIN|ANSION)|ECUTE)|SCAPE(D{s}BY)?|N(GINE(S)?|CLOSED{s}BY|D(S)?|UM|ABLE{s}KEYS)|VE(RY|NT)|LSE(IF)?|ACH)|K(ILL({s}(CONNECTION|QUERY))?|EY(S|_BLOCK_SIZE)?)|F(R(OM|AC_SECOND)|I(RST|XED|LE)|O(R(CE|EIGN)?|UND)|U(NCTION|LL(TEXT)?)|ETCH|L(OAT(8|4)?|USH)|A(ST|LSE))|A(G(GREGATE|AINST)|S(C(II)?|ENSITIVE)?|N(Y|D|ALYZE)|C(CESSIBLE|TION)|T|DD|UT(HORS|O(_INCREMENT|EXTEND_SIZE))|VG(_ROW_LENGTH)?|FTER|L(GORITHM|TER|L)))
keywordl (R(TREE|IGHT|O(UTINE|W(S|_FORMAT)?|LL(BACK|UP))|E(GEXP|MOVE{s}PARTITIONING|BUILD{s}PARTITION|S(T(RICT|ORE{s}TABLE)|IGNAL|UME|ET)|NAME|COVER|TURN(S)?|ORGANIZE{s}PARTITION|D(O(_BUFFER_SIZE|FILE)|UNDANT)|P(EAT(ABLE)?|L(ICATION|ACE)|AIR)|VOKE|QUIRE|FERENCES|L(OAD|EASE|AY_(THREAD|LOG_(POS|FILE)))|A(D(S|_(ONLY|WRITE))?|L))|LIKE|ANGE)|M(YSQL_ERRNO|I(GRATE|N(_ROWS|UTE(_(MICROSECOND|SECOND))?)|CROSECOND|DDLEINT)|O(NTH|D(IF(Y|IES)|E)?)|U(TEX|LTI(PO(INT|LYGON)|LINESTRING))|E(RGE|MORY|SSAGE_TEXT|DIUM(BLOB|TEXT|INT)?)|A(X(_(ROWS|SIZE|CONNECTIONS_PER_HOUR|U(SER_CONNECTIONS|PDATES_PER_HOUR)|QUERIES_PER_HOUR)|VALUE)|STER(_(S(SL(_(C(IPHER|ERT|A(PATH)?)|VERIFY_SERVER_CERT|KEY))?|ERVER_ID)|H(OST|EARTBEAT_PERIOD)|CONNECT_RETRY|USER|P(ORT|ASSWORD)|LOG_(POS|FILE)))?|TCH))|N(CHAR|O(NE|_W(RITE_TO_BINLOG|AIT)|T|DEGROUP)?|DB(CLUSTER)?|U(MERIC|LL)|E(XT|W)|VARCHAR|A(ME(S)?|T(IONAL|URAL)))|O(R(DER{s}BY)?|N(({s}DUPLICATE{s}KEY{s}UPDATE)?|E(_SHOT)?|LINE)?|UT(ER|FILE)?|P(TI(MIZE|ON(S|ALLY)?)|EN)|FF(SET|LINE)|WNER|LD_PASSWORD)|P(R(I(MARY|VILEGES)|OCE(SS|DURE{s}(ANALYSE)?)|E(SERVE|CISION|PARE|V))|HASE|O(RT|INT|LYGON)|URGE|A(R(SER|TI(TION(S|ING)?|AL))|SSWORD|CK_KEYS))|QU(ICK|ERY|ARTER)|L(I(MIT|ST|NE(S(TRING)?|AR)|KE)|O(G(S|FILE({s}GROUP))|NG(BLOB|TEXT)?|C(K(S)?|AL(TIME(STAMP)?)?)|OP|W_PRIORITY|AD{s}(DATA|INDEX{s}INTO{s}CACHE|XML))|E(SS|VEL|FT|A(DING|VE(S)?))|A(ST|NGUAGE)))
keywords (X(OR|509|A)|S(MALLINT|SL|H(OW({s}(E(NGINE(S)?|RRORS)|M(ASTER|UTEX)|BINLOG|GRANTS|INNODB|P(RIVILEGES|ROFILE(S)?|ROCEDURE{s}CODE)|SLAVE{s}(HOSTS|STATUS)|TRIGGERS|VARIABLES|WARNINGS|(FULL{s})?PROCESSLIST|FIELDS|PLUGIN(S)?|STORAGE{s}ENGINES|TABLE{s}TYPES|CO(LUMNS|LLATION)|BINLOG{s}EVENTS))?|UTDOWN|ARE)|NAPSHOT|CHE(MA(S|_NAME)?|DULE(R)?)|T(R(ING|AIGHT_JOIN)|O(RAGE|P)|A(RT(S|ING{s}BY)?|TUS))|I(GN(ED|AL)|MPLE)|O(ME|NAME|CKET|UNDS)|U(B(CLASS_ORIGIN|JECT|PARTITION(S)?)|SPEND|PER)|P(ECIFIC|ATIAL)|E(R(IAL(IZABLE)?|VER)|SSION|NSITIVE|C(OND(_MICROSECOND)?|URITY)|T({s}(PASSWORD|NAMES|ONE_SHOT))?|PARATOR|LECT)|QL(STATE|_(MAX_JOIN_SIZE|B(IG_(RESULT|SELECTS|TABLES)|UFFER_RESULT)|S(MALL_RESULT|ELECT_LIMIT|LAVE_SKIP_COUNTER|AFE_UPDATES)|NO_CACHE|CA(CHE|LC_FOUND_ROWS)|T(SI_(M(INUTE|ONTH)|SECOND|HOUR|YEAR|DAY|QUARTER|FRAC_SECOND|WEEK)|HREAD)|QUOTE_SHOW_CREATE|WARNINGS|LO(G_(BIN|OFF|UPDATE)|W_PRIORITY_UPDATES)|AUTO_IS_NULL)|EXCEPTION|WARNING)?|L(OW|AVE)|AVEPOINT)|YEAR(_MONTH)?|T(R(IGGER(S)?|U(NCATE|E)|A(NSACTION|ILING))|H(EN|AN)|YPE|I(ME(STAMP(DIFF|ADD)?)?|NY(BLOB|TEXT|INT))|O|E(RMINATED{s}BY|XT|MP(TABLE|ORARY))|ABLE(S(PACE)?|_NAME)?)|ZEROFILL|U(S(ING|E(R(_RESOURCES)?|_FRM)?|AGE)|N(SIGNED|COMMITTED|TIL|I(NSTALL({s}PLUGIN)?|CODE|ON|QUE)|D(O(_BUFFER_SIZE|FILE)?|EFINED)|KNOWN|LOCK)|TC_(TIME(STAMP)?|DATE)|P(GRADE|DATE))|V(IEW|A(R(BINARY|YING|CHAR(ACTER)?|IABLES)|LUE(S)?))|W(R(ITE|APPER)|H(ILE|E(RE|N))|ITH({s}PARSER)?|ORK|EEK|A(RNINGS|IT)))
@@ -50,7 +50,7 @@ size_t yyuoffset, yyuleng;
s [ \t\n\r]
dkey "delimiter"
scol ";"
dval [!-]
dval [!-\x7E\x80-\xEF]
compstart "begin"{s}
compend {s}"end"
%x comment

0 comments on commit 60a5d64

Please sign in to comment.