Skip to content

Commit

Permalink
adding Ben Barnett's patch for quote escaping
Browse files Browse the repository at this point in the history
  • Loading branch information
Todd Ditchendorf committed Apr 29, 2010
1 parent b586658 commit 75054d6
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 24 deletions.
7 changes: 7 additions & 0 deletions include/ParseKit/PKQuoteState.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
@interface PKQuoteState : PKTokenizerState {
BOOL allowsEOFTerminatedQuotes;
BOOL balancesEOFTerminatedQuotes;
BOOL usesCSVStyleEscaping;
}

/*!
Expand All @@ -36,4 +37,10 @@
@brief if YES, this state will append a matching quote char (<tt>'</tt> or <tt>"</tt>) to strings terminated by EOF. Default is NO.
*/
@property (nonatomic) BOOL balancesEOFTerminatedQuotes;

/*!
@property usesCSVStyleEscaping
@brief if NO, this state will use slash-style escaping (<tt>\'</tt> or <tt>\"</tt>). If YES, it will use CSV-style escaping, by doubling the quote character (<tt>''</tt> or <tt>""</tt>). The default behaviour is NO (slash-style).
*/
@property (nonatomic) BOOL usesCSVStyleEscaping;
@end
16 changes: 14 additions & 2 deletions src/PKQuoteState.m
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,21 @@ - (PKToken *)nextTokenFromReader:(PKReader *)r startingWith:(PKUniChar)cin token
[r unread:[[self bufferedString] length]];
return [[self nextTokenizerStateFor:cin tokenizer:t] nextTokenFromReader:r startingWith:cin tokenizer:t];
}
} else {
} else if ((!usesCSVStyleEscaping && c == '\\') || (usesCSVStyleEscaping && c == cin)) {
PKUniChar peek = [r read];
if (peek == cin) {
[self append:c];
[self append:peek];
c = PKEOF; // Just to get past the while() condition
} else {
if (peek != PKEOF) {
[r unread:1];
}
[self append:c];
}
} else {
[self append:c];
}

} while (c != cin);

PKToken *tok = [PKToken tokenWithTokenType:PKTokenTypeQuotedString stringValue:[self bufferedString] floatValue:0.0];
Expand All @@ -74,4 +85,5 @@ - (PKToken *)nextTokenFromReader:(PKReader *)r startingWith:(PKUniChar)cin token

@synthesize allowsEOFTerminatedQuotes;
@synthesize balancesEOFTerminatedQuotes;
@synthesize usesCSVStyleEscaping;
@end
1 change: 1 addition & 0 deletions test/TDQuoteStateTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

@interface TDQuoteStateTest : SenTestCase {
PKQuoteState *quoteState;
PKTokenizer *t;
PKReader *r;
NSString *s;
}
Expand Down
125 changes: 103 additions & 22 deletions test/TDQuoteStateTest.m
Original file line number Diff line number Diff line change
Expand Up @@ -18,91 +18,172 @@
@implementation TDQuoteStateTest

- (void)setUp {
quoteState = [[PKQuoteState alloc] init];
r = [[PKReader alloc] init];
t = [PKTokenizer tokenizer];
quoteState = t.quoteState;
}


- (void)tearDown {
[quoteState release];
[r release];
}


- (void)testQuotedString {
s = @"'stuff'";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
}


- (void)testQuotedStringEscaped {
s = @"'it\\'s'";
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
}


- (void)testQuotedStringEscaped2 {
s = @"'it\\'s'";
t.string = s;
quoteState.usesCSVStyleEscaping = YES;

PKToken *tok = [t nextToken];
TDEqualObjects(@"'it\\'", tok.stringValue);

tok = [t nextToken];
TDEqualObjects(@"s'", tok.stringValue);
TDTrue(tok.isWord);

tok = [t nextToken];
TDEquals([PKToken EOFToken], tok);
}


- (void)testQuotedStringEscapedDouble {
s = @"'it''s'";
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(@"'it'", tok.stringValue);

tok = [t nextToken];
TDEqualObjects(@"'s'", tok.stringValue);

tok = [t nextToken];
TDEquals([PKToken EOFToken], tok);
}


- (void)testQuotedStringEscapedDouble2 {
s = @"'it''s'";
t.string = s;
quoteState.usesCSVStyleEscaping = YES;

PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);

tok = [t nextToken];
TDEquals([PKToken EOFToken], tok);
}


- (void)testQuotedStringEscapedDouble3 {
s = @"'it''s' cool";
t.string = s;
quoteState.usesCSVStyleEscaping = YES;

PKToken *tok = [t nextToken];
TDEqualObjects(@"'it''s'", tok.stringValue);

tok = [t nextToken];
TDEqualObjects(@"cool", tok.stringValue);

tok = [t nextToken];
TDEquals([PKToken EOFToken], tok);
}


- (void)testQuotedStringEscapedDouble4 {
s = @"'it''s'cool";
t.string = s;
quoteState.usesCSVStyleEscaping = YES;

PKToken *tok = [t nextToken];
TDEqualObjects(@"'it''s'", tok.stringValue);

tok = [t nextToken];
TDEqualObjects(@"cool", tok.stringValue);

tok = [t nextToken];
TDEquals([PKToken EOFToken], tok);
}


- (void)testQuotedStringEOFTerminated {
s = @"'stuff";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
}


- (void)testQuotedStringRepairEOFTerminated {
s = @"'stuff";
r.string = s;
t.string = s;
quoteState.balancesEOFTerminatedQuotes = YES;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
PKToken *tok = [t nextToken];
TDEqualObjects(@"'stuff'", tok.stringValue);
}


- (void)testQuotedStringPlus {
s = @"'a quote here' more";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(@"'a quote here'", tok.stringValue);
}


- (void)test14CharQuotedString {
s = @"'123456789abcef'";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
TDTrue(tok.isQuotedString);
}


- (void)test15CharQuotedString {
s = @"'123456789abcefg'";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
TDTrue(tok.isQuotedString);
}


- (void)test16CharQuotedString {
s = @"'123456789abcefgh'";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
TDTrue(tok.isQuotedString);
}


- (void)test31CharQuotedString {
s = @"'123456789abcefgh123456789abcefg'";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
TDTrue(tok.isQuotedString);
}


- (void)test32CharQuotedString {
s = @"'123456789abcefgh123456789abcefgh'";
r.string = s;
PKToken *tok = [quoteState nextTokenFromReader:r startingWith:[r read] tokenizer:nil];
t.string = s;
PKToken *tok = [t nextToken];
TDEqualObjects(s, tok.stringValue);
TDTrue(tok.isQuotedString);
}
Expand Down

0 comments on commit 75054d6

Please sign in to comment.