31
31
#import " SPSQLParser.h"
32
32
#import " RegexKitLite.h"
33
33
34
- /* *
35
- * Include all the extern variables and prototypes required for flex (used for syntax highlighting)
36
- */
37
- #import " SPSQLTokenizer.h"
38
- extern NSInteger tolex ();
39
- extern NSInteger yyuoffset, yyuleng;
40
- typedef struct to_buffer_state *TO_BUFFER_STATE;
41
- void to_switch_to_buffer (TO_BUFFER_STATE);
42
- TO_BUFFER_STATE to_scan_string (const char *);
43
-
44
34
@interface SPSQLParser ()
45
35
46
36
- (unichar ) _charAtIndex : (NSInteger )index ;
47
37
- (void ) _clearCharCache ;
48
38
49
39
@end
50
40
41
+ /* *
42
+ * Define the length of the character cache to use when parsing instead of accessing
43
+ * via characterAtIndex:. There is a balance here between updating the cache very
44
+ * often and access penalties; 1500 appears a reasonable compromise.
45
+ */
46
+ #define CHARACTER_CACHE_LENGTH 1500
47
+
48
+ #define CHAR_SQUOTE ' \' '
49
+ #define CHAR_DQUOTE ' "'
50
+ #define CHAR_BTICK ' `'
51
+ #define CHAR_BS ' \\ '
52
+ #define CHAR_CR ' \r '
53
+ #define CHAR_LF ' \n '
54
+
55
+ #define STRING_SQUOTE @" '"
56
+ #define STRING_DQUOTE @" \" "
57
+ #define STRING_BS @" \\ "
58
+ #define STRING_LF @" \n "
51
59
52
60
/* *
53
61
* Please see the header files for a general description of the purpose of this class,
@@ -91,6 +99,11 @@ - (void) setDelimiterSupport:(BOOL)shouldSupportDelimiters
91
99
supportDelimiters = shouldSupportDelimiters;
92
100
}
93
101
102
+ - (void ) setNoBackslashEscapes : (BOOL )ignoreBackslashEscapes
103
+ {
104
+ noBackslashEscapes = ignoreBackslashEscapes;
105
+ }
106
+
94
107
#pragma mark -
95
108
#pragma mark SQL-aware utility methods
96
109
@@ -109,9 +122,9 @@ - (void) deleteComments
109
122
switch (currentCharacter) {
110
123
111
124
// When quote characters are encountered walk to the end of the quoted string.
112
- case ' \' ' :
113
- case ' " ' :
114
- case ' ` ' :
125
+ case CHAR_SQUOTE :
126
+ case CHAR_DQUOTE :
127
+ case CHAR_BTICK :
115
128
quotedStringEndIndex = [self endIndexOfStringQuotedByCharacter: currentCharacter startingAtIndex: currentStringIndex+1 ];
116
129
if (quotedStringEndIndex == NSNotFound ) {
117
130
return ;
@@ -169,7 +182,7 @@ - (NSString *) unquotedString
169
182
170
183
// If the first character is not a quote character, return the entire string.
171
184
quoteCharacter = CFStringGetCharacterAtIndex ((CFStringRef)string, 0 );
172
- if (quoteCharacter != ' ` ' && quoteCharacter != ' " ' && quoteCharacter != ' \' ' ) {
185
+ if (quoteCharacter != CHAR_BTICK && quoteCharacter != CHAR_DQUOTE && quoteCharacter != CHAR_SQUOTE ) {
173
186
return [NSString stringWithString: string];
174
187
}
175
188
@@ -183,15 +196,16 @@ - (NSString *) unquotedString
183
196
returnString = [NSMutableString stringWithString: [string substringWithRange: NSMakeRange (1 , stringEndIndex-1 )]];
184
197
185
198
// Remove escaped characters and escaped strings as appropriate
186
- if (quoteCharacter == ' `' || quoteCharacter == ' "' || quoteCharacter == ' \' ' ) {
187
- [returnString replaceOccurrencesOfString: [NSString stringWithFormat: @" %C%C " , quoteCharacter, quoteCharacter] withString: [NSString stringWithFormat: @" %C " , quoteCharacter] options: 0 range: NSMakeRange (0 , [returnString length ])];
188
- }
189
- if (quoteCharacter == ' "' ) {
190
- [returnString replaceOccurrencesOfString: @" \\\" " withString: @" \" " options: 0 range: NSMakeRange (0 , [returnString length ])];
191
- [returnString replaceOccurrencesOfString: @" \\\\ " withString: @" \\ " options: 0 range: NSMakeRange (0 , [returnString length ])];
192
- } else if (quoteCharacter == ' \' ' ) {
193
- [returnString replaceOccurrencesOfString: @" \\ '" withString: @" '" options: 0 range: NSMakeRange (0 , [returnString length ])];
194
- [returnString replaceOccurrencesOfString: @" \\\\ " withString: @" \\ " options: 0 range: NSMakeRange (0 , [returnString length ])];
199
+ [returnString replaceOccurrencesOfString: [NSString stringWithFormat: @" %C%C " , quoteCharacter, quoteCharacter] withString: [NSString stringWithFormat: @" %C " , quoteCharacter] options: 0 range: NSMakeRange (0 , [returnString length ])];
200
+
201
+ if (!noBackslashEscapes) {
202
+ if (quoteCharacter == CHAR_DQUOTE) {
203
+ [returnString replaceOccurrencesOfString: (STRING_BS STRING_DQUOTE) withString: STRING_DQUOTE options: 0 range: NSMakeRange (0 , [returnString length ])];
204
+ [returnString replaceOccurrencesOfString: (STRING_BS STRING_BS) withString: STRING_BS options: 0 range: NSMakeRange (0 , [returnString length ])];
205
+ } else if (quoteCharacter == CHAR_SQUOTE) {
206
+ [returnString replaceOccurrencesOfString: (STRING_BS STRING_SQUOTE) withString: STRING_SQUOTE options: 0 range: NSMakeRange (0 , [returnString length ])];
207
+ [returnString replaceOccurrencesOfString: (STRING_BS STRING_BS) withString: STRING_BS options: 0 range: NSMakeRange (0 , [returnString length ])];
208
+ }
195
209
}
196
210
197
211
return returnString;
@@ -202,6 +216,11 @@ - (NSString *) unquotedString
202
216
* ends, and ensures line endings which aren't in quotes are LF.
203
217
*/
204
218
+ (NSString *) normaliseQueryForExecution : (NSString *)queryString
219
+ {
220
+ return [self normaliseQueryForExecution: queryString noBackslashEscapes: NO ];
221
+ }
222
+
223
+ + (NSString *) normaliseQueryForExecution : (NSString *)queryString noBackslashEscapes : (BOOL )noBackslashEscapes
205
224
{
206
225
NSUInteger stringLength = [queryString length ];
207
226
NSCharacterSet *trimCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet ];
@@ -222,42 +241,45 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
222
241
223
242
// Check for carriage returns in the string
224
243
NSMutableArray *carriageReturnPositions = [NSMutableArray array ];
225
- NSUInteger currentStringIndex, innerStringIndex, i, quotedStringLength;
226
- unichar currentCharacter, innerCharacter;
227
- BOOL characterIsEscaped;
228
- for (currentStringIndex = 0 ; currentStringIndex < stringLength; currentStringIndex++) {
229
- currentCharacter = CFStringGetCharacterAtIndex ((CFStringRef)queryString, currentStringIndex);
244
+ for (NSUInteger currentStringIndex = 0 ; currentStringIndex < stringLength; currentStringIndex++) {
245
+ unichar currentCharacter = CFStringGetCharacterAtIndex ((CFStringRef)queryString, currentStringIndex);
230
246
switch (currentCharacter) {
231
247
232
248
// When quote characters are encountered walk to the end of the quoted string.
233
- case ' \' ' :
234
- case ' "' :
235
- case ' `' :
236
- for (innerStringIndex = currentStringIndex+1 ; innerStringIndex < stringLength; innerStringIndex++) {
237
- innerCharacter = CFStringGetCharacterAtIndex ((CFStringRef)queryString, innerStringIndex);
249
+ case CHAR_SQUOTE:
250
+ case CHAR_DQUOTE:
251
+ case CHAR_BTICK:
252
+ {
253
+ #warning duplicate code with -endIndexOfStringQuotedByCharacter:startingIndex:
254
+ NSUInteger innerStringIndex;
255
+ for (innerStringIndex = currentStringIndex + 1 ; innerStringIndex < stringLength; innerStringIndex++) {
256
+ unichar innerCharacter = CFStringGetCharacterAtIndex ((CFStringRef) queryString, innerStringIndex);
238
257
239
258
// If the string end is a backtick and one has been encountered, treat it as end of string
240
- if (innerCharacter == ' ` ' && currentCharacter == ' ` ' ) {
241
-
259
+ if (innerCharacter == CHAR_BTICK && currentCharacter == CHAR_BTICK ) {
260
+
242
261
// ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both.
243
- if ((innerStringIndex + 1 ) < stringLength && CFStringGetCharacterAtIndex ((CFStringRef)queryString, innerStringIndex+ 1 ) == ' ` ' ) {
262
+ if ((innerStringIndex + 1 ) < stringLength && CFStringGetCharacterAtIndex ((CFStringRef) queryString, innerStringIndex + 1 ) == CHAR_BTICK ) {
244
263
innerStringIndex++;
245
264
continue ;
246
265
}
247
266
248
267
currentStringIndex = innerStringIndex;
249
268
break ;
250
269
270
+ }
251
271
// Otherwise, prepare to treat the string as ended when meeting the correct boundary character....
252
- } else if (innerCharacter == currentCharacter) {
272
+ else if (innerCharacter == currentCharacter) {
253
273
254
274
// ...but only if the string end isn't escaped with an *odd* number of escaping characters...
255
- characterIsEscaped = NO ;
256
- i = 1 ;
257
- quotedStringLength = innerStringIndex - 1 ;
258
- while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex ((CFStringRef)queryString, innerStringIndex - i) == ' \\ ' ) {
259
- characterIsEscaped = !characterIsEscaped;
260
- i++;
275
+ BOOL characterIsEscaped = NO ;
276
+ if (!noBackslashEscapes) {
277
+ NSUInteger i = 1 ;
278
+ NSUInteger quotedStringLength = innerStringIndex - 1 ;
279
+ while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex ((CFStringRef) queryString, innerStringIndex - i) == CHAR_BS) {
280
+ characterIsEscaped = !characterIsEscaped;
281
+ i++;
282
+ }
261
283
}
262
284
263
285
// If an even number have been found, it may be the end of the string - as long as the subsequent character
@@ -278,9 +300,10 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
278
300
// The quoted string has been left open - end processing.
279
301
currentStringIndex = innerStringIndex;
280
302
break ;
303
+ }
281
304
282
- case ' \r ' :
283
- [carriageReturnPositions addObject: [ NSNumber numberWithUnsignedInteger: currentStringIndex] ];
305
+ case CHAR_CR :
306
+ [carriageReturnPositions addObject: @( currentStringIndex) ];
284
307
break ;
285
308
}
286
309
}
@@ -289,20 +312,18 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
289
312
NSUInteger carriageReturnCount = [carriageReturnPositions count ];
290
313
if (carriageReturnCount) {
291
314
NSMutableString *normalisedString = [NSMutableString stringWithString: queryString];
292
- BOOL isCRLF;
293
- NSUInteger CRLocation;
294
315
while ( carriageReturnCount-- ) {
295
- CRLocation = [[carriageReturnPositions objectAtIndex: carriageReturnCount] unsignedIntegerValue ];
316
+ NSUInteger CRLocation = [[carriageReturnPositions objectAtIndex: carriageReturnCount] unsignedIntegerValue ];
296
317
297
318
// Check whether it's a CRLF or just a CR
298
- isCRLF = NO ;
299
- if ([normalisedString length ] > CRLocation + 1 && CFStringGetCharacterAtIndex ((CFStringRef)normalisedString, CRLocation + 1 ) == ' \n ' ) isCRLF = YES ;
319
+ BOOL isCRLF = NO ;
320
+ if ([normalisedString length ] > CRLocation + 1 && CFStringGetCharacterAtIndex ((CFStringRef)normalisedString, CRLocation + 1 ) == CHAR_LF ) isCRLF = YES ;
300
321
301
322
// Normalise the line endings
302
323
if (isCRLF) {
303
324
[normalisedString deleteCharactersInRange: NSMakeRange (CRLocation, 1 )];
304
325
} else {
305
- [normalisedString replaceCharactersInRange: NSMakeRange (CRLocation, 1 ) withString: @" \n " ];
326
+ [normalisedString replaceCharactersInRange: NSMakeRange (CRLocation, 1 ) withString: STRING_LF ];
306
327
}
307
328
}
308
329
queryString = normalisedString;
@@ -706,9 +727,9 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
706
727
switch (currentCharacter) {
707
728
708
729
// When quote characters are encountered and strings are not being ignored, walk to the end of the quoted string.
709
- case ' \' ' :
710
- case ' " ' :
711
- case ' ` ' :
730
+ case CHAR_SQUOTE :
731
+ case CHAR_DQUOTE :
732
+ case CHAR_BTICK :
712
733
if (!ignoreQuotedStrings) break ;
713
734
quotedStringEndIndex = (NSUInteger )(*endIndex)(self, @selector (endIndexOfStringQuotedByCharacter:startingAtIndex: ), currentCharacter, currentStringIndex+1 );
714
735
if (quotedStringEndIndex == NSNotFound ) {
@@ -750,7 +771,7 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
750
771
break ;
751
772
752
773
// Capture whether carriage returns are encountered
753
- case ' \r ' :
774
+ case CHAR_CR :
754
775
if (!containsCRs) containsCRs = YES ;
755
776
break ;
756
777
@@ -820,42 +841,43 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
820
841
*/
821
842
- (NSUInteger ) endIndexOfStringQuotedByCharacter : (unichar )quoteCharacter startingAtIndex : (NSInteger )startIndex
822
843
{
823
- NSInteger currentStringIndex, stringLength;
824
- NSUInteger i, quotedStringLength;
825
- BOOL characterIsEscaped;
826
- unichar currentCharacter;
827
-
828
844
// Cache the charAtIndex selector, avoiding dynamic binding overhead
829
845
IMP charAtIndex = [self methodForSelector: @selector (_charAtIndex: )];
830
846
SEL charAtIndexSEL = @selector (_charAtIndex: );
831
847
832
- stringLength = [string length ];
848
+ NSInteger stringLength = [string length ];
833
849
834
850
// Walk the string looking for the string end
835
- for ( currentStringIndex = startIndex; currentStringIndex < stringLength; currentStringIndex++) {
836
- currentCharacter = (unichar )(long )(*charAtIndex)(self, charAtIndexSEL, currentStringIndex);
851
+ for (NSInteger currentStringIndex = startIndex; currentStringIndex < stringLength; currentStringIndex++) {
852
+ unichar currentCharacter = (unichar )(long )(*charAtIndex)(self, charAtIndexSEL, currentStringIndex);
837
853
838
854
// If the string end is a backtick and one has been encountered, treat it as end of string
839
- if (quoteCharacter == ' ` ' && currentCharacter == ' ` ' ) {
855
+ if (quoteCharacter == CHAR_BTICK && currentCharacter == CHAR_BTICK ) {
840
856
841
857
// ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both.
842
- if ((currentStringIndex + 1 ) < stringLength && (unichar )(long )(*charAtIndex)(self, charAtIndexSEL, currentStringIndex+1 ) == ' ` ' ) {
858
+ if ((currentStringIndex + 1 ) < stringLength && (unichar )(long )(*charAtIndex)(self, charAtIndexSEL, currentStringIndex+1 ) == CHAR_BTICK ) {
843
859
currentStringIndex++;
844
860
continue ;
845
861
}
862
+
863
+ // Note: backslash+backtick is not an escape sequence inside a backtick string!
864
+ // i.e. »select `abc\`;« is a syntactically valid query. Some versions of the mysql CLI client
865
+ // have a bug though and will interpret \` as an escaped backtick.
846
866
847
867
return currentStringIndex;
848
-
868
+ }
849
869
// Otherwise, prepare to treat the string as ended when meeting the correct boundary character....
850
- } else if (currentCharacter == quoteCharacter) {
870
+ else if (currentCharacter == quoteCharacter) {
851
871
852
872
// ...but only if the string end isn't escaped with an *odd* number of escaping characters...
853
- characterIsEscaped = NO ;
854
- i = 1 ;
855
- quotedStringLength = currentStringIndex - 1 ;
856
- while ((quotedStringLength - i) > 0 && (unichar )(long )(*charAtIndex)(self, charAtIndexSEL, currentStringIndex - i) == ' \\ ' ) {
857
- characterIsEscaped = !characterIsEscaped;
858
- i++;
873
+ BOOL characterIsEscaped = NO ;
874
+ if (!noBackslashEscapes) {
875
+ NSUInteger i = 1 ;
876
+ NSUInteger quotedStringLength = currentStringIndex - 1 ;
877
+ while ((quotedStringLength - i) > 0 && (unichar ) (long ) (*charAtIndex)(self, charAtIndexSEL, currentStringIndex - i) == CHAR_BS) {
878
+ characterIsEscaped = !characterIsEscaped;
879
+ i++;
880
+ }
859
881
}
860
882
861
883
// If an even number have been found, it may be the end of the string - as long as the subsequent character
@@ -899,8 +921,8 @@ - (NSUInteger) endIndexOfCommentOfType:(SPCommentType)commentType startingAtInde
899
921
anIndex++;
900
922
for ( ; anIndex < stringLength; anIndex++ ) {
901
923
currentCharacter = (unichar )(long )(*charAtIndex)(self, charAtIndexSEL, anIndex);
902
- if (currentCharacter == ' \r ' ) containsCRs = YES ;
903
- if (currentCharacter == ' \r ' || currentCharacter == ' \n ' ) {
924
+ if (currentCharacter == CHAR_CR ) containsCRs = YES ;
925
+ if (currentCharacter == CHAR_CR || currentCharacter == CHAR_LF ) {
904
926
return anIndex-1 ;
905
927
}
906
928
}
@@ -1004,6 +1026,7 @@ - (void) initSQLExtensions {
1004
1026
delimiterLengthMinusOne = 0 ;
1005
1027
lastMatchIsDelimiter = NO ;
1006
1028
containsCRs = NO ;
1029
+ noBackslashEscapes = NO ;
1007
1030
}
1008
1031
- (NSUInteger ) length {
1009
1032
return [string length ];
0 commit comments