Skip to content

Commit 75781e0

Browse files
committed
Parser support for NO_BACKSLASH_ESCAPES (part of #3005)
1 parent 3e93a12 commit 75781e0

File tree

2 files changed

+120
-85
lines changed

2 files changed

+120
-85
lines changed

Source/SPSQLParser.h

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,7 @@
2828
//
2929
// More info at <https://github.com/sequelpro/sequelpro>
3030

31-
/*
32-
* Define the length of the character cache to use when parsing instead of accessing
33-
* via characterAtIndex:. There is a balance here between updating the cache very
34-
* often and access penalties; 1500 appears a reasonable compromise.
35-
*/
36-
#define CHARACTER_CACHE_LENGTH 1500
37-
38-
/*
31+
/**
3932
* This class provides a string class intended for SQL parsing. It extends NSMutableString,
4033
* with the intention that as a string is parsed the parsed content is removed. This also
4134
* allows parsing to occur in "streaming" mode, with parseable content being pulled off the
@@ -57,8 +50,9 @@
5750
*
5851
* It is anticipated that characterAtIndex: is currently the parsing weak point, and that in future
5952
* this class could be further optimised by working with the underlying object/characters directly.
53+
*
54+
* This class is NOT thread safe!
6055
*/
61-
6256
@interface SPSQLParser : NSMutableString
6357
{
6458
NSMutableString *string;
@@ -69,14 +63,15 @@
6963
NSInteger charCacheEnd;
7064
BOOL ignoreCommentStrings;
7165
BOOL containsCRs;
66+
BOOL noBackslashEscapes;
7267

7368
BOOL supportDelimiters;
7469
NSString *delimiter;
7570
NSUInteger delimiterLengthMinusOne;
7671
BOOL lastMatchIsDelimiter;
7772
}
7873

79-
typedef enum _SPCommentTypes {
74+
typedef enum {
8075
SPHashComment = 0,
8176
SPDoubleDashComment = 1,
8277
SPCStyleComment = 2
@@ -107,6 +102,15 @@ typedef enum _SPCommentTypes {
107102
*/
108103
- (void) setDelimiterSupport:(BOOL)shouldSupportDelimiters;
109104

105+
/**
106+
* This setting controls the parser equivalent of the NO_BACKSLASH_ESCAPES
107+
* SQL mode.
108+
* If set to YES, the backslash character will not have any special meaning in strings
109+
* and will be treated as a regular character. This also includes the self escape sequence ("\\").
110+
* Escaping single quotes/double quotes/backticks by doubling them is not affected by this.
111+
*/
112+
- (void) setNoBackslashEscapes:(BOOL)ignoreBackslashEscapes;
113+
110114
/**
111115
* Removes comments within the current string, trimming "#", "--[/s]", and "⁄* *⁄" style strings.
112116
*/
@@ -123,6 +127,14 @@ typedef enum _SPCommentTypes {
123127
/**
124128
* Normalise a string, readying it for queries - trims whitespace from both
125129
* ends, and ensures line endings which aren't in quotes are LF.
130+
* The setting of noBackslashEscapes controls whether backslash characters can function
131+
* as escape characters for single or double quotes in strings.
132+
*/
133+
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString noBackslashEscapes:(BOOL)noBackslashEscapes;
134+
135+
/**
136+
* Convenience overload of the method above with noBackslashEscapes set to NO (i.e.
137+
* backslashes can be used for escaping quotes in strings)
126138
*/
127139
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString;
128140

Source/SPSQLParser.m

Lines changed: 98 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,31 @@
3131
#import "SPSQLParser.h"
3232
#import "RegexKitLite.h"
3333

34-
/**
35-
* Include all the extern variables and prototypes required for flex (used for syntax highlighting)
36-
*/
37-
#import "SPSQLTokenizer.h"
38-
extern NSInteger tolex();
39-
extern NSInteger yyuoffset, yyuleng;
40-
typedef struct to_buffer_state *TO_BUFFER_STATE;
41-
void to_switch_to_buffer(TO_BUFFER_STATE);
42-
TO_BUFFER_STATE to_scan_string (const char *);
43-
4434
@interface SPSQLParser ()
4535

4636
- (unichar) _charAtIndex:(NSInteger)index;
4737
- (void) _clearCharCache;
4838

4939
@end
5040

41+
/**
42+
* Define the length of the character cache to use when parsing instead of accessing
43+
* via characterAtIndex:. There is a balance here between updating the cache very
44+
* often and access penalties; 1500 appears a reasonable compromise.
45+
*/
46+
#define CHARACTER_CACHE_LENGTH 1500
47+
48+
#define CHAR_SQUOTE '\''
49+
#define CHAR_DQUOTE '"'
50+
#define CHAR_BTICK '`'
51+
#define CHAR_BS '\\'
52+
#define CHAR_CR '\r'
53+
#define CHAR_LF '\n'
54+
55+
#define STRING_SQUOTE @"'"
56+
#define STRING_DQUOTE @"\""
57+
#define STRING_BS @"\\"
58+
#define STRING_LF @"\n"
5159

5260
/**
5361
* Please see the header files for a general description of the purpose of this class,
@@ -91,6 +99,11 @@ - (void) setDelimiterSupport:(BOOL)shouldSupportDelimiters
9199
supportDelimiters = shouldSupportDelimiters;
92100
}
93101

102+
- (void) setNoBackslashEscapes:(BOOL)ignoreBackslashEscapes
103+
{
104+
noBackslashEscapes = ignoreBackslashEscapes;
105+
}
106+
94107
#pragma mark -
95108
#pragma mark SQL-aware utility methods
96109

@@ -109,9 +122,9 @@ - (void) deleteComments
109122
switch (currentCharacter) {
110123

111124
// When quote characters are encountered walk to the end of the quoted string.
112-
case '\'':
113-
case '"':
114-
case '`':
125+
case CHAR_SQUOTE:
126+
case CHAR_DQUOTE:
127+
case CHAR_BTICK:
115128
quotedStringEndIndex = [self endIndexOfStringQuotedByCharacter:currentCharacter startingAtIndex:currentStringIndex+1];
116129
if (quotedStringEndIndex == NSNotFound) {
117130
return;
@@ -169,7 +182,7 @@ - (NSString *) unquotedString
169182

170183
// If the first character is not a quote character, return the entire string.
171184
quoteCharacter = CFStringGetCharacterAtIndex((CFStringRef)string, 0);
172-
if (quoteCharacter != '`' && quoteCharacter != '"' && quoteCharacter != '\'') {
185+
if (quoteCharacter != CHAR_BTICK && quoteCharacter != CHAR_DQUOTE && quoteCharacter != CHAR_SQUOTE) {
173186
return [NSString stringWithString:string];
174187
}
175188

@@ -183,15 +196,16 @@ - (NSString *) unquotedString
183196
returnString = [NSMutableString stringWithString:[string substringWithRange:NSMakeRange(1, stringEndIndex-1)]];
184197

185198
// Remove escaped characters and escaped strings as appropriate
186-
if (quoteCharacter == '`' || quoteCharacter == '"' || quoteCharacter == '\'') {
187-
[returnString replaceOccurrencesOfString:[NSString stringWithFormat:@"%C%C", quoteCharacter, quoteCharacter] withString:[NSString stringWithFormat:@"%C", quoteCharacter] options:0 range:NSMakeRange(0, [returnString length])];
188-
}
189-
if (quoteCharacter == '"') {
190-
[returnString replaceOccurrencesOfString:@"\\\"" withString:@"\"" options:0 range:NSMakeRange(0, [returnString length])];
191-
[returnString replaceOccurrencesOfString:@"\\\\" withString:@"\\" options:0 range:NSMakeRange(0, [returnString length])];
192-
} else if (quoteCharacter == '\'') {
193-
[returnString replaceOccurrencesOfString:@"\\'" withString:@"'" options:0 range:NSMakeRange(0, [returnString length])];
194-
[returnString replaceOccurrencesOfString:@"\\\\" withString:@"\\" options:0 range:NSMakeRange(0, [returnString length])];
199+
[returnString replaceOccurrencesOfString:[NSString stringWithFormat:@"%C%C", quoteCharacter, quoteCharacter] withString:[NSString stringWithFormat:@"%C", quoteCharacter] options:0 range:NSMakeRange(0, [returnString length])];
200+
201+
if(!noBackslashEscapes) {
202+
if (quoteCharacter == CHAR_DQUOTE) {
203+
[returnString replaceOccurrencesOfString:(STRING_BS STRING_DQUOTE) withString:STRING_DQUOTE options:0 range:NSMakeRange(0, [returnString length])];
204+
[returnString replaceOccurrencesOfString:(STRING_BS STRING_BS) withString:STRING_BS options:0 range:NSMakeRange(0, [returnString length])];
205+
} else if (quoteCharacter == CHAR_SQUOTE) {
206+
[returnString replaceOccurrencesOfString:(STRING_BS STRING_SQUOTE) withString:STRING_SQUOTE options:0 range:NSMakeRange(0, [returnString length])];
207+
[returnString replaceOccurrencesOfString:(STRING_BS STRING_BS) withString:STRING_BS options:0 range:NSMakeRange(0, [returnString length])];
208+
}
195209
}
196210

197211
return returnString;
@@ -202,6 +216,11 @@ - (NSString *) unquotedString
202216
* ends, and ensures line endings which aren't in quotes are LF.
203217
*/
204218
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString
219+
{
220+
return [self normaliseQueryForExecution:queryString noBackslashEscapes:NO];
221+
}
222+
223+
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString noBackslashEscapes:(BOOL)noBackslashEscapes
205224
{
206225
NSUInteger stringLength = [queryString length];
207226
NSCharacterSet *trimCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet];
@@ -222,42 +241,45 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
222241

223242
// Check for carriage returns in the string
224243
NSMutableArray *carriageReturnPositions = [NSMutableArray array];
225-
NSUInteger currentStringIndex, innerStringIndex, i, quotedStringLength;
226-
unichar currentCharacter, innerCharacter;
227-
BOOL characterIsEscaped;
228-
for (currentStringIndex = 0; currentStringIndex < stringLength; currentStringIndex++) {
229-
currentCharacter = CFStringGetCharacterAtIndex((CFStringRef)queryString, currentStringIndex);
244+
for (NSUInteger currentStringIndex = 0; currentStringIndex < stringLength; currentStringIndex++) {
245+
unichar currentCharacter = CFStringGetCharacterAtIndex((CFStringRef)queryString, currentStringIndex);
230246
switch (currentCharacter) {
231247

232248
// When quote characters are encountered walk to the end of the quoted string.
233-
case '\'':
234-
case '"':
235-
case '`':
236-
for (innerStringIndex = currentStringIndex+1; innerStringIndex < stringLength; innerStringIndex++) {
237-
innerCharacter = CFStringGetCharacterAtIndex((CFStringRef)queryString, innerStringIndex);
249+
case CHAR_SQUOTE:
250+
case CHAR_DQUOTE:
251+
case CHAR_BTICK:
252+
{
253+
#warning duplicate code with -endIndexOfStringQuotedByCharacter:startingIndex:
254+
NSUInteger innerStringIndex;
255+
for (innerStringIndex = currentStringIndex + 1; innerStringIndex < stringLength; innerStringIndex++) {
256+
unichar innerCharacter = CFStringGetCharacterAtIndex((CFStringRef) queryString, innerStringIndex);
238257

239258
// If the string end is a backtick and one has been encountered, treat it as end of string
240-
if (innerCharacter == '`' && currentCharacter == '`') {
241-
259+
if (innerCharacter == CHAR_BTICK && currentCharacter == CHAR_BTICK) {
260+
242261
// ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both.
243-
if ((innerStringIndex + 1) < stringLength && CFStringGetCharacterAtIndex((CFStringRef)queryString, innerStringIndex+1) == '`') {
262+
if ((innerStringIndex + 1) < stringLength && CFStringGetCharacterAtIndex((CFStringRef) queryString, innerStringIndex + 1) == CHAR_BTICK) {
244263
innerStringIndex++;
245264
continue;
246265
}
247266

248267
currentStringIndex = innerStringIndex;
249268
break;
250269

270+
}
251271
// Otherwise, prepare to treat the string as ended when meeting the correct boundary character....
252-
} else if (innerCharacter == currentCharacter) {
272+
else if (innerCharacter == currentCharacter) {
253273

254274
// ...but only if the string end isn't escaped with an *odd* number of escaping characters...
255-
characterIsEscaped = NO;
256-
i = 1;
257-
quotedStringLength = innerStringIndex - 1;
258-
while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex((CFStringRef)queryString, innerStringIndex - i) == '\\') {
259-
characterIsEscaped = !characterIsEscaped;
260-
i++;
275+
BOOL characterIsEscaped = NO;
276+
if (!noBackslashEscapes) {
277+
NSUInteger i = 1;
278+
NSUInteger quotedStringLength = innerStringIndex - 1;
279+
while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex((CFStringRef) queryString, innerStringIndex - i) == CHAR_BS) {
280+
characterIsEscaped = !characterIsEscaped;
281+
i++;
282+
}
261283
}
262284

263285
// If an even number have been found, it may be the end of the string - as long as the subsequent character
@@ -278,9 +300,10 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
278300
// The quoted string has been left open - end processing.
279301
currentStringIndex = innerStringIndex;
280302
break;
303+
}
281304

282-
case '\r':
283-
[carriageReturnPositions addObject:[NSNumber numberWithUnsignedInteger:currentStringIndex]];
305+
case CHAR_CR:
306+
[carriageReturnPositions addObject:@(currentStringIndex)];
284307
break;
285308
}
286309
}
@@ -289,20 +312,18 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
289312
NSUInteger carriageReturnCount = [carriageReturnPositions count];
290313
if (carriageReturnCount) {
291314
NSMutableString *normalisedString = [NSMutableString stringWithString:queryString];
292-
BOOL isCRLF;
293-
NSUInteger CRLocation;
294315
while ( carriageReturnCount-- ) {
295-
CRLocation = [[carriageReturnPositions objectAtIndex:carriageReturnCount] unsignedIntegerValue];
316+
NSUInteger CRLocation = [[carriageReturnPositions objectAtIndex:carriageReturnCount] unsignedIntegerValue];
296317

297318
// Check whether it's a CRLF or just a CR
298-
isCRLF = NO;
299-
if ([normalisedString length] > CRLocation + 1 && CFStringGetCharacterAtIndex((CFStringRef)normalisedString, CRLocation + 1) == '\n') isCRLF = YES;
319+
BOOL isCRLF = NO;
320+
if ([normalisedString length] > CRLocation + 1 && CFStringGetCharacterAtIndex((CFStringRef)normalisedString, CRLocation + 1) == CHAR_LF) isCRLF = YES;
300321

301322
// Normalise the line endings
302323
if (isCRLF) {
303324
[normalisedString deleteCharactersInRange:NSMakeRange(CRLocation, 1)];
304325
} else {
305-
[normalisedString replaceCharactersInRange:NSMakeRange(CRLocation, 1) withString:@"\n"];
326+
[normalisedString replaceCharactersInRange:NSMakeRange(CRLocation, 1) withString:STRING_LF];
306327
}
307328
}
308329
queryString = normalisedString;
@@ -706,9 +727,9 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
706727
switch (currentCharacter) {
707728

708729
// When quote characters are encountered and strings are not being ignored, walk to the end of the quoted string.
709-
case '\'':
710-
case '"':
711-
case '`':
730+
case CHAR_SQUOTE:
731+
case CHAR_DQUOTE:
732+
case CHAR_BTICK:
712733
if (!ignoreQuotedStrings) break;
713734
quotedStringEndIndex = (NSUInteger)(*endIndex)(self, @selector(endIndexOfStringQuotedByCharacter:startingAtIndex:), currentCharacter, currentStringIndex+1);
714735
if (quotedStringEndIndex == NSNotFound) {
@@ -750,7 +771,7 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
750771
break;
751772

752773
// Capture whether carriage returns are encountered
753-
case '\r':
774+
case CHAR_CR:
754775
if (!containsCRs) containsCRs = YES;
755776
break;
756777

@@ -820,42 +841,43 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
820841
*/
821842
- (NSUInteger) endIndexOfStringQuotedByCharacter:(unichar)quoteCharacter startingAtIndex:(NSInteger)startIndex
822843
{
823-
NSInteger currentStringIndex, stringLength;
824-
NSUInteger i, quotedStringLength;
825-
BOOL characterIsEscaped;
826-
unichar currentCharacter;
827-
828844
// Cache the charAtIndex selector, avoiding dynamic binding overhead
829845
IMP charAtIndex = [self methodForSelector:@selector(_charAtIndex:)];
830846
SEL charAtIndexSEL = @selector(_charAtIndex:);
831847

832-
stringLength = [string length];
848+
NSInteger stringLength = [string length];
833849

834850
// Walk the string looking for the string end
835-
for ( currentStringIndex = startIndex; currentStringIndex < stringLength; currentStringIndex++) {
836-
currentCharacter = (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex);
851+
for (NSInteger currentStringIndex = startIndex; currentStringIndex < stringLength; currentStringIndex++) {
852+
unichar currentCharacter = (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex);
837853

838854
// If the string end is a backtick and one has been encountered, treat it as end of string
839-
if (quoteCharacter == '`' && currentCharacter == '`') {
855+
if (quoteCharacter == CHAR_BTICK && currentCharacter == CHAR_BTICK) {
840856

841857
// ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both.
842-
if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex+1) == '`') {
858+
if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex+1) == CHAR_BTICK) {
843859
currentStringIndex++;
844860
continue;
845861
}
862+
863+
// Note: backslash+backtick is not an escape sequence inside a backtick string!
864+
// i.e. »select `abc\`;« is a syntactically valid query. Some versions of the mysql CLI client
865+
// have a bug though and will interpret \` as an escaped backtick.
846866

847867
return currentStringIndex;
848-
868+
}
849869
// Otherwise, prepare to treat the string as ended when meeting the correct boundary character....
850-
} else if (currentCharacter == quoteCharacter) {
870+
else if (currentCharacter == quoteCharacter) {
851871

852872
// ...but only if the string end isn't escaped with an *odd* number of escaping characters...
853-
characterIsEscaped = NO;
854-
i = 1;
855-
quotedStringLength = currentStringIndex - 1;
856-
while ((quotedStringLength - i) > 0 && (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex - i) == '\\') {
857-
characterIsEscaped = !characterIsEscaped;
858-
i++;
873+
BOOL characterIsEscaped = NO;
874+
if(!noBackslashEscapes) {
875+
NSUInteger i = 1;
876+
NSUInteger quotedStringLength = currentStringIndex - 1;
877+
while ((quotedStringLength - i) > 0 && (unichar) (long) (*charAtIndex)(self, charAtIndexSEL, currentStringIndex - i) == CHAR_BS) {
878+
characterIsEscaped = !characterIsEscaped;
879+
i++;
880+
}
859881
}
860882

861883
// If an even number have been found, it may be the end of the string - as long as the subsequent character
@@ -899,8 +921,8 @@ - (NSUInteger) endIndexOfCommentOfType:(SPCommentType)commentType startingAtInde
899921
anIndex++;
900922
for ( ; anIndex < stringLength; anIndex++ ) {
901923
currentCharacter = (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, anIndex);
902-
if (currentCharacter == '\r') containsCRs = YES;
903-
if (currentCharacter == '\r' || currentCharacter == '\n') {
924+
if (currentCharacter == CHAR_CR) containsCRs = YES;
925+
if (currentCharacter == CHAR_CR || currentCharacter == CHAR_LF) {
904926
return anIndex-1;
905927
}
906928
}
@@ -1004,6 +1026,7 @@ - (void) initSQLExtensions {
10041026
delimiterLengthMinusOne = 0;
10051027
lastMatchIsDelimiter = NO;
10061028
containsCRs = NO;
1029+
noBackslashEscapes = NO;
10071030
}
10081031
- (NSUInteger) length {
10091032
return [string length];

0 commit comments

Comments
 (0)