Permalink
Browse files

Parser support for NO_BACKSLASH_ESCAPES (part of #3005)

  • Loading branch information...
dmoagx committed Mar 21, 2018
1 parent 3e93a12 commit 75781e0d7c4bafeedee9f1f98b105a260d47a308
Showing with 120 additions and 85 deletions.
  1. +22 −10 Source/SPSQLParser.h
  2. +98 −75 Source/SPSQLParser.m
@@ -28,14 +28,7 @@
//
// More info at <https://github.com/sequelpro/sequelpro>
/*
* Define the length of the character cache to use when parsing instead of accessing
* via characterAtIndex:. There is a balance here between updating the cache very
* often and access penalties; 1500 appears a reasonable compromise.
*/
#define CHARACTER_CACHE_LENGTH 1500
/*
/**
* This class provides a string class intended for SQL parsing. It extends NSMutableString,
* with the intention that as a string is parsed the parsed content is removed. This also
* allows parsing to occur in "streaming" mode, with parseable content being pulled off the
@@ -57,8 +50,9 @@
*
* It is anticipated that characterAtIndex: is currently the parsing weak point, and that in future
* this class could be further optimised by working with the underlying object/characters directly.
*
* This class is NOT thread safe!
*/
@interface SPSQLParser : NSMutableString
{
NSMutableString *string;
@@ -69,14 +63,15 @@
NSInteger charCacheEnd;
BOOL ignoreCommentStrings;
BOOL containsCRs;
BOOL noBackslashEscapes;
BOOL supportDelimiters;
NSString *delimiter;
NSUInteger delimiterLengthMinusOne;
BOOL lastMatchIsDelimiter;
}
typedef enum _SPCommentTypes {
typedef enum {
SPHashComment = 0,
SPDoubleDashComment = 1,
SPCStyleComment = 2
@@ -107,6 +102,15 @@ typedef enum _SPCommentTypes {
*/
- (void) setDelimiterSupport:(BOOL)shouldSupportDelimiters;
/**
* This setting controls the parser equivalent of the NO_BACKSLASH_ESCAPES
* SQL mode.
* If set to YES, the backslash character will not have any special meaning in strings
* and will be treated as a regular character. This also includes the self escape sequence ("\\").
* Escaping single quotes/double quotes/backticks by doubling them is not affected by this.
*/
- (void) setNoBackslashEscapes:(BOOL)ignoreBackslashEscapes;
/**
* Removes comments within the current string, trimming "#", "--[/s]", and "⁄* *⁄" style strings.
*/
@@ -123,6 +127,14 @@ typedef enum _SPCommentTypes {
/**
* Normalise a string, readying it for queries - trims whitespace from both
* ends, and ensures line endings which aren't in quotes are LF.
* The setting of noBackslashEscapes controls whether backslash characters can function
* as escape characters for single or double quotes in strings.
*/
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString noBackslashEscapes:(BOOL)noBackslashEscapes;
/**
* Convenience overload of the method above with noBackslashEscapes set to NO (i.e.
* backslashes can be used for escaping quotes in strings)
*/
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString;
@@ -31,23 +31,31 @@
#import "SPSQLParser.h"
#import "RegexKitLite.h"
/**
* Include all the extern variables and prototypes required for flex (used for syntax highlighting)
*/
#import "SPSQLTokenizer.h"
extern NSInteger tolex();
extern NSInteger yyuoffset, yyuleng;
typedef struct to_buffer_state *TO_BUFFER_STATE;
void to_switch_to_buffer(TO_BUFFER_STATE);
TO_BUFFER_STATE to_scan_string (const char *);
@interface SPSQLParser ()
- (unichar) _charAtIndex:(NSInteger)index;
- (void) _clearCharCache;
@end
/**
* Define the length of the character cache to use when parsing instead of accessing
* via characterAtIndex:. There is a balance here between updating the cache very
* often and access penalties; 1500 appears a reasonable compromise.
*/
#define CHARACTER_CACHE_LENGTH 1500
#define CHAR_SQUOTE '\''
#define CHAR_DQUOTE '"'
#define CHAR_BTICK '`'
#define CHAR_BS '\\'
#define CHAR_CR '\r'
#define CHAR_LF '\n'
#define STRING_SQUOTE @"'"
#define STRING_DQUOTE @"\""
#define STRING_BS @"\\"
#define STRING_LF @"\n"
/**
* Please see the header files for a general description of the purpose of this class,
@@ -91,6 +99,11 @@ - (void) setDelimiterSupport:(BOOL)shouldSupportDelimiters
supportDelimiters = shouldSupportDelimiters;
}
- (void) setNoBackslashEscapes:(BOOL)ignoreBackslashEscapes
{
noBackslashEscapes = ignoreBackslashEscapes;
}
#pragma mark -
#pragma mark SQL-aware utility methods
@@ -109,9 +122,9 @@ - (void) deleteComments
switch (currentCharacter) {
// When quote characters are encountered walk to the end of the quoted string.
case '\'':
case '"':
case '`':
case CHAR_SQUOTE:
case CHAR_DQUOTE:
case CHAR_BTICK:
quotedStringEndIndex = [self endIndexOfStringQuotedByCharacter:currentCharacter startingAtIndex:currentStringIndex+1];
if (quotedStringEndIndex == NSNotFound) {
return;
@@ -169,7 +182,7 @@ - (NSString *) unquotedString
// If the first character is not a quote character, return the entire string.
quoteCharacter = CFStringGetCharacterAtIndex((CFStringRef)string, 0);
if (quoteCharacter != '`' && quoteCharacter != '"' && quoteCharacter != '\'') {
if (quoteCharacter != CHAR_BTICK && quoteCharacter != CHAR_DQUOTE && quoteCharacter != CHAR_SQUOTE) {
return [NSString stringWithString:string];
}
@@ -183,15 +196,16 @@ - (NSString *) unquotedString
returnString = [NSMutableString stringWithString:[string substringWithRange:NSMakeRange(1, stringEndIndex-1)]];
// Remove escaped characters and escaped strings as appropriate
if (quoteCharacter == '`' || quoteCharacter == '"' || quoteCharacter == '\'') {
[returnString replaceOccurrencesOfString:[NSString stringWithFormat:@"%C%C", quoteCharacter, quoteCharacter] withString:[NSString stringWithFormat:@"%C", quoteCharacter] options:0 range:NSMakeRange(0, [returnString length])];
}
if (quoteCharacter == '"') {
[returnString replaceOccurrencesOfString:@"\\\"" withString:@"\"" options:0 range:NSMakeRange(0, [returnString length])];
[returnString replaceOccurrencesOfString:@"\\\\" withString:@"\\" options:0 range:NSMakeRange(0, [returnString length])];
} else if (quoteCharacter == '\'') {
[returnString replaceOccurrencesOfString:@"\\'" withString:@"'" options:0 range:NSMakeRange(0, [returnString length])];
[returnString replaceOccurrencesOfString:@"\\\\" withString:@"\\" options:0 range:NSMakeRange(0, [returnString length])];
[returnString replaceOccurrencesOfString:[NSString stringWithFormat:@"%C%C", quoteCharacter, quoteCharacter] withString:[NSString stringWithFormat:@"%C", quoteCharacter] options:0 range:NSMakeRange(0, [returnString length])];
if(!noBackslashEscapes) {
if (quoteCharacter == CHAR_DQUOTE) {
[returnString replaceOccurrencesOfString:(STRING_BS STRING_DQUOTE) withString:STRING_DQUOTE options:0 range:NSMakeRange(0, [returnString length])];
[returnString replaceOccurrencesOfString:(STRING_BS STRING_BS) withString:STRING_BS options:0 range:NSMakeRange(0, [returnString length])];
} else if (quoteCharacter == CHAR_SQUOTE) {
[returnString replaceOccurrencesOfString:(STRING_BS STRING_SQUOTE) withString:STRING_SQUOTE options:0 range:NSMakeRange(0, [returnString length])];
[returnString replaceOccurrencesOfString:(STRING_BS STRING_BS) withString:STRING_BS options:0 range:NSMakeRange(0, [returnString length])];
}
}
return returnString;
@@ -202,6 +216,11 @@ - (NSString *) unquotedString
* ends, and ensures line endings which aren't in quotes are LF.
*/
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString
{
return [self normaliseQueryForExecution:queryString noBackslashEscapes:NO];
}
+ (NSString *) normaliseQueryForExecution:(NSString *)queryString noBackslashEscapes:(BOOL)noBackslashEscapes
{
NSUInteger stringLength = [queryString length];
NSCharacterSet *trimCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet];
@@ -222,42 +241,45 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
// Check for carriage returns in the string
NSMutableArray *carriageReturnPositions = [NSMutableArray array];
NSUInteger currentStringIndex, innerStringIndex, i, quotedStringLength;
unichar currentCharacter, innerCharacter;
BOOL characterIsEscaped;
for (currentStringIndex = 0; currentStringIndex < stringLength; currentStringIndex++) {
currentCharacter = CFStringGetCharacterAtIndex((CFStringRef)queryString, currentStringIndex);
for (NSUInteger currentStringIndex = 0; currentStringIndex < stringLength; currentStringIndex++) {
unichar currentCharacter = CFStringGetCharacterAtIndex((CFStringRef)queryString, currentStringIndex);
switch (currentCharacter) {
// When quote characters are encountered walk to the end of the quoted string.
case '\'':
case '"':
case '`':
for (innerStringIndex = currentStringIndex+1; innerStringIndex < stringLength; innerStringIndex++) {
innerCharacter = CFStringGetCharacterAtIndex((CFStringRef)queryString, innerStringIndex);
case CHAR_SQUOTE:
case CHAR_DQUOTE:
case CHAR_BTICK:
{
#warning duplicate code with -endIndexOfStringQuotedByCharacter:startingIndex:
NSUInteger innerStringIndex;
for (innerStringIndex = currentStringIndex + 1; innerStringIndex < stringLength; innerStringIndex++) {
unichar innerCharacter = CFStringGetCharacterAtIndex((CFStringRef) queryString, innerStringIndex);
// If the string end is a backtick and one has been encountered, treat it as end of string
if (innerCharacter == '`' && currentCharacter == '`') {
if (innerCharacter == CHAR_BTICK && currentCharacter == CHAR_BTICK) {
// ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both.
if ((innerStringIndex + 1) < stringLength && CFStringGetCharacterAtIndex((CFStringRef)queryString, innerStringIndex+1) == '`') {
if ((innerStringIndex + 1) < stringLength && CFStringGetCharacterAtIndex((CFStringRef) queryString, innerStringIndex + 1) == CHAR_BTICK) {
innerStringIndex++;
continue;
}
currentStringIndex = innerStringIndex;
break;
}
// Otherwise, prepare to treat the string as ended when meeting the correct boundary character....
} else if (innerCharacter == currentCharacter) {
else if (innerCharacter == currentCharacter) {
// ...but only if the string end isn't escaped with an *odd* number of escaping characters...
characterIsEscaped = NO;
i = 1;
quotedStringLength = innerStringIndex - 1;
while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex((CFStringRef)queryString, innerStringIndex - i) == '\\') {
characterIsEscaped = !characterIsEscaped;
i++;
BOOL characterIsEscaped = NO;
if (!noBackslashEscapes) {
NSUInteger i = 1;
NSUInteger quotedStringLength = innerStringIndex - 1;
while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex((CFStringRef) queryString, innerStringIndex - i) == CHAR_BS) {
characterIsEscaped = !characterIsEscaped;
i++;
}
}
// If an even number have been found, it may be the end of the string - as long as the subsequent character
@@ -278,9 +300,10 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
// The quoted string has been left open - end processing.
currentStringIndex = innerStringIndex;
break;
}
case '\r':
[carriageReturnPositions addObject:[NSNumber numberWithUnsignedInteger:currentStringIndex]];
case CHAR_CR:
[carriageReturnPositions addObject:@(currentStringIndex)];
break;
}
}
@@ -289,20 +312,18 @@ + (NSString *) normaliseQueryForExecution:(NSString *)queryString
NSUInteger carriageReturnCount = [carriageReturnPositions count];
if (carriageReturnCount) {
NSMutableString *normalisedString = [NSMutableString stringWithString:queryString];
BOOL isCRLF;
NSUInteger CRLocation;
while ( carriageReturnCount-- ) {
CRLocation = [[carriageReturnPositions objectAtIndex:carriageReturnCount] unsignedIntegerValue];
NSUInteger CRLocation = [[carriageReturnPositions objectAtIndex:carriageReturnCount] unsignedIntegerValue];
// Check whether it's a CRLF or just a CR
isCRLF = NO;
if ([normalisedString length] > CRLocation + 1 && CFStringGetCharacterAtIndex((CFStringRef)normalisedString, CRLocation + 1) == '\n') isCRLF = YES;
BOOL isCRLF = NO;
if ([normalisedString length] > CRLocation + 1 && CFStringGetCharacterAtIndex((CFStringRef)normalisedString, CRLocation + 1) == CHAR_LF) isCRLF = YES;
// Normalise the line endings
if (isCRLF) {
[normalisedString deleteCharactersInRange:NSMakeRange(CRLocation, 1)];
} else {
[normalisedString replaceCharactersInRange:NSMakeRange(CRLocation, 1) withString:@"\n"];
[normalisedString replaceCharactersInRange:NSMakeRange(CRLocation, 1) withString:STRING_LF];
}
}
queryString = normalisedString;
@@ -706,9 +727,9 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
switch (currentCharacter) {
// When quote characters are encountered and strings are not being ignored, walk to the end of the quoted string.
case '\'':
case '"':
case '`':
case CHAR_SQUOTE:
case CHAR_DQUOTE:
case CHAR_BTICK:
if (!ignoreQuotedStrings) break;
quotedStringEndIndex = (NSUInteger)(*endIndex)(self, @selector(endIndexOfStringQuotedByCharacter:startingAtIndex:), currentCharacter, currentStringIndex+1);
if (quotedStringEndIndex == NSNotFound) {
@@ -750,7 +771,7 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
break;
// Capture whether carriage returns are encountered
case '\r':
case CHAR_CR:
if (!containsCRs) containsCRs = YES;
break;
@@ -820,42 +841,43 @@ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteg
*/
- (NSUInteger) endIndexOfStringQuotedByCharacter:(unichar)quoteCharacter startingAtIndex:(NSInteger)startIndex
{
NSInteger currentStringIndex, stringLength;
NSUInteger i, quotedStringLength;
BOOL characterIsEscaped;
unichar currentCharacter;
// Cache the charAtIndex selector, avoiding dynamic binding overhead
IMP charAtIndex = [self methodForSelector:@selector(_charAtIndex:)];
SEL charAtIndexSEL = @selector(_charAtIndex:);
stringLength = [string length];
NSInteger stringLength = [string length];
// Walk the string looking for the string end
for ( currentStringIndex = startIndex; currentStringIndex < stringLength; currentStringIndex++) {
currentCharacter = (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex);
for (NSInteger currentStringIndex = startIndex; currentStringIndex < stringLength; currentStringIndex++) {
unichar currentCharacter = (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex);
// If the string end is a backtick and one has been encountered, treat it as end of string
if (quoteCharacter == '`' && currentCharacter == '`') {
if (quoteCharacter == CHAR_BTICK && currentCharacter == CHAR_BTICK) {
// ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both.
if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex+1) == '`') {
if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex+1) == CHAR_BTICK) {
currentStringIndex++;
continue;
}
// Note: backslash+backtick is not an escape sequence inside a backtick string!
// i.e. »select `abc\`;« is a syntactically valid query. Some versions of the mysql CLI client
// have a bug though and will interpret \` as an escaped backtick.
return currentStringIndex;
}
// Otherwise, prepare to treat the string as ended when meeting the correct boundary character....
} else if (currentCharacter == quoteCharacter) {
else if (currentCharacter == quoteCharacter) {
// ...but only if the string end isn't escaped with an *odd* number of escaping characters...
characterIsEscaped = NO;
i = 1;
quotedStringLength = currentStringIndex - 1;
while ((quotedStringLength - i) > 0 && (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, currentStringIndex - i) == '\\') {
characterIsEscaped = !characterIsEscaped;
i++;
BOOL characterIsEscaped = NO;
if(!noBackslashEscapes) {
NSUInteger i = 1;
NSUInteger quotedStringLength = currentStringIndex - 1;
while ((quotedStringLength - i) > 0 && (unichar) (long) (*charAtIndex)(self, charAtIndexSEL, currentStringIndex - i) == CHAR_BS) {
characterIsEscaped = !characterIsEscaped;
i++;
}
}
// If an even number have been found, it may be the end of the string - as long as the subsequent character
@@ -899,8 +921,8 @@ - (NSUInteger) endIndexOfCommentOfType:(SPCommentType)commentType startingAtInde
anIndex++;
for ( ; anIndex < stringLength; anIndex++ ) {
currentCharacter = (unichar)(long)(*charAtIndex)(self, charAtIndexSEL, anIndex);
if (currentCharacter == '\r') containsCRs = YES;
if (currentCharacter == '\r' || currentCharacter == '\n') {
if (currentCharacter == CHAR_CR) containsCRs = YES;
if (currentCharacter == CHAR_CR || currentCharacter == CHAR_LF) {
return anIndex-1;
}
}
@@ -1004,6 +1026,7 @@ - (void) initSQLExtensions {
delimiterLengthMinusOne = 0;
lastMatchIsDelimiter = NO;
containsCRs = NO;
noBackslashEscapes = NO;
}
- (NSUInteger) length {
return [string length];

0 comments on commit 75781e0

Please sign in to comment.