Permalink
Browse files

Implemented basic comments parsing and tests.

  • Loading branch information...
1 parent 1d947fb commit 47e2c012cbea5c705d320c3b45dbc6903b2da97f @tomaz committed Jul 31, 2010
@@ -9,6 +9,7 @@
#ifdef __OBJC__
#import <Foundation/Foundation.h>
#import "NSObject+GBObject.h"
+ #import "NSString+GBString.h"
#import "NSException+GBException.h"
#import "NSFileManager+GBFileManager.h"
#import "GBLog.h"
@@ -0,0 +1,24 @@
+//
+// NSString+GBString.h
+// appledoc
+//
+// Created by Tomaz Kragelj on 31.7.10.
+// Copyright (C) 2010, Gentle Bytes. All rights reserved.
+//
+
+#import <Foundation/Foundation.h>
+
+/** Provides string extensions that make the rest of parsing code simpler. */
+@interface NSString (GBString)
+
+/** Trims all characters from the given set from the string end.
+
+ Works the same way as `[NSString stringByTrimmingCharactersInSetFromEnd:]` except it trims from end only.
+
+ @param set The set of characters to trim.
+ @return Returns trimmed string.
+ @exception NSException Thrown if the given set is `nil`.
+ */
+- (NSString *)stringByTrimmingCharactersInSetFromEnd:(NSCharacterSet *)set;
+
+@end
@@ -0,0 +1,46 @@
+//
+// NSString+GBString.m
+// appledoc
+//
+// Created by Tomaz Kragelj on 31.7.10.
+// Copyright (C) 2010, Gentle Bytes. All rights reserved.
+//
+
+#import "NSString+GBString.h"
+
+@interface NSString (GBPrivateAPI)
+
+/** Returns the last character of the string.
+
+ @return Returs the last character of the string.
+ @exception NSRangeException Thrown if the string is empty.
+ */
+- (unichar)lastCharacter;
+
+@end
+
+#pragma mark -
+
+@implementation NSString (GBString)
+
+- (NSString *)stringByTrimmingCharactersInSetFromEnd:(NSCharacterSet *)set {
+ NSParameterAssert(set != nil);
+ NSMutableString *result = [self mutableCopy];
+ while ([result length] > 0 && [set characterIsMember:[result lastCharacter]]) {
+ [result deleteCharactersInRange:NSMakeRange([result length] - 1, 1)];
+ }
+ return result;
+}
+
+@end
+
+#pragma mark -
+
+@implementation NSString (GBPrivateAPI)
+
+- (unichar)lastCharacter {
+ return [self characterAtIndex:[self length] - 1];
+}
+
+@end
+
Binary file not shown.
@@ -13,7 +13,10 @@
/** Implements Objective-C source code parser.
- The main responsibility of this class is encapsulation of Objective-C source code parsing into in-memory representation.
+ The main responsibility of this class is encapsulation of Objective-C source code parsing into in-memory representation. As we're only
+ parsing a small subset of Objective-C and even then we don't need to handle much specifics beyond recognizing different classes, variables,
+ methods etc., overall the parsing process is quite simple. Basically we use ParseKit's `PKTokenizer` to split given input string into tokens
+ and then traverse the list of tokens to get the data we need.
*/
@interface GBObjectiveCParser : NSObject
View
@@ -23,7 +23,16 @@
[tokenizer consume:1];
}
- This example simply iterates over all tokens and prints each one to the log.
+ This example simply iterates over all tokens and prints each one to the log. If you want to parse a block of input with known start
+ and/or end token, you can use one of the block consuming methods instead.
+
+ To make comments parsing simpler, `GBTokenizer` automatically enables comment reporting to the underlying `PKTokenizer`, however to
+ prevent higher level parsers dealing with complexity of comments, any lookahead and consume method doesn't report them. Instead these
+ methods skip all comment tokens, however they do make them accessible through properties, so if the client wants to check whether
+ there's any comment associated with current token, it can simply ask by sending `lastCommentString`. This value is automatically
+ cleared when another non-comment token is consumed, so make sure to read it before consuming any further token! `GBTokenizer` goes
+ even further when dealing with comments - it automatically groups single line comments into a single comment group and removes all
+ prefixes and suffixes.
*/
@interface GBTokenizer : NSObject
@@ -54,36 +63,46 @@
///---------------------------------------------------------------------------------------
/** Returns the current token.
+
+ @see consume
+ @see lookahead
*/
- (PKToken *)currentToken;
/** Returns the token by looking ahead the given number of tokens from current position.
- If offset "points" within a valid token, the token is returned, otherwise EOF token is returned.
+ If offset "points" within a valid token, the token is returned, otherwise EOF token is returned. Note that this method automatically
+ skips any comment tokens and only counts actual language tokens.
@param offset The offset from the current position.
@return Returns the token at the given offset or EOF token if offset point after EOF.
+ @see consume
*/
- (PKToken *)lookahead:(NSUInteger)offset;
/** Consumes the given ammoun of tokens, starting at the current position.
This effectively "moves" `currentToken` to the new position. If EOF is reached before consuming the given ammount of tokens,
- consuming stops at the end of stream and `currentToken` returns EOF token.
+ consuming stops at the end of stream and `currentToken` returns EOF token. If comment tokens are detected while consuming, they
+ are not counted and consuming count continues with actual language tokens. However if there is a comment just before the next
+ current token (i.e. after the last consumed token), the comment data is saved and is available through `lastCommentString`.
+ Otherwise last comment data is cleared, even if a comment was detected in between.
@param count The number of tokens to consume.
+ @see lastCommentString
*/
- (void)consume:(NSUInteger)count;
/** Enumerates and consumes all tokens starting at current token up until the given end token is detected.
For each token, the given block is called which gives client a chance to inspect and handle tokens. End token is not reported and
- is automatically consumed after all previous tokens are reported. Sending this message is equivalent to sending
- `consumeFrom:to:usingBlock:` and passing `nil` for start token.
+ is automatically consumed after all previous tokens are reported. Sending this message is equivalent to sending `consumeFrom:to:usingBlock:`
+ and passing `nil` for start token. Also read `consume:` documentation to understand how comments are dealt with.
@param end Ending token.
@param block The block to be called for each token.
@exception NSException Thrown if the given end token is `nil`.
+ @see lastCommentString
*/
- (void)consumeTo:(NSString *)end usingBlock:(void (^)(PKToken *token, BOOL *consume, BOOL *stop))block;
@@ -92,12 +111,13 @@
For each token, the given block is called which gives client a chance to inspect and handle tokens. If start token is given
and current token matches it, the token is consumed without reporting it to block. However if the token doesn't match, the
method returns immediately without doint anything. End token is also not reported and is also automatically consumed after
- all previous tokens are reported.
+ all previous tokens are reported. Also read `consume:` documentation to understand how comments are dealt with.
@param start Optional starting token or `nil`.
@param end Ending token.
@param block The block to be called for each token.
@exception NSException Thrown if the given end token is `nil`.
+ @see lastCommentString
*/
- (void)consumeFrom:(NSString *)start to:(NSString *)end usingBlock:(void (^)(PKToken *token, BOOL *consume, BOOL *stop))block;
@@ -107,4 +127,20 @@
*/
- (BOOL)eof;
+///---------------------------------------------------------------------------------------
+/// @name Comments handling
+///---------------------------------------------------------------------------------------
+
+/** Returns the last comment string or `nil` if comment is not available.
+
+ This returns the whole last comment string, without prefixes or suffixes. To optimize things a bit, the actual comment string value
+ is prepared on the fly, as you send the message, so it's only handled if needed. However you should cache returned value if possible
+ to avoid any overhead.
+
+ If there's no comment available for current token, `nil` is returned.
+
+ @return Returns comment string or `nil` if no comment is available.
+ */
+@property (readonly) NSString *lastCommentString;
+
@end
View
@@ -11,9 +11,11 @@
@interface GBTokenizer ()
+- (BOOL)consumeComments;
- (NSArray *)allTokensFromTokenizer:(PKTokenizer *)tokenizer;
@property (retain) NSArray *tokens;
@property (assign) NSUInteger tokenIndex;
+@property (retain) NSMutableString *lastComment;
@end
@@ -32,25 +34,45 @@ - (id)initWithSourceTokenizer:(PKTokenizer *)tokenizer {
GBLogDebug(@"Initializing tokenizer using %@...", tokenizer);
self = [super init];
if (self) {
- self.tokens = [self allTokensFromTokenizer:tokenizer];
self.tokenIndex = 0;
+ self.lastComment = [NSMutableString string];
+ self.tokens = [self allTokensFromTokenizer:tokenizer];
+ [self consumeComments];
}
return self;
}
#pragma mark Tokenizing handling
- (PKToken *)lookahead:(NSUInteger)offset {
- if (self.tokenIndex + offset >= [self.tokens count]) return [PKToken EOFToken];
- return [self.tokens objectAtIndex:self.tokenIndex + offset];
+ NSUInteger delta = 0;
+ NSUInteger counter = 0;
+ while (counter <= offset) {
+ NSUInteger index = self.tokenIndex + delta;
+ if (index >= [self.tokens count]) return [PKToken EOFToken];
+ if ([[self.tokens objectAtIndex:index] isComment]) {
+ delta++;
+ continue;
+ }
+ delta++;
+ counter++;
+ }
+ return [self.tokens objectAtIndex:self.tokenIndex + delta - 1];
}
- (PKToken *)currentToken {
- return [self lookahead:0];
+ if ([self eof]) return [PKToken EOFToken];
+ return [self.tokens objectAtIndex:self.tokenIndex];
}
- (void)consume:(NSUInteger)count {
- self.tokenIndex += count;
+ if (count == 0) return;
+ while (count > 0 && ![self eof]) {
+ self.tokenIndex++;
+ if (![self consumeComments]) {
+ }
+ count--;
+ }
}
- (void)consumeTo:(NSString *)end usingBlock:(void (^)(PKToken *token, BOOL *consume, BOOL *stop))block {
@@ -81,20 +103,78 @@ - (BOOL)eof {
return (self.tokenIndex >= [self.tokens count]);
}
+#pragma mark Comments handling
+
+- (BOOL)consumeComments {
+ // This method checks if current token is a comment and consumes all comments until non-comment token is detected or EOF reached.
+ // The result of the method is that current index is positioned on the first non-comment token. If current token is not comment,
+ // the method doesn't do anything, but simply returns NO to indicate it didn't find a comment and therefore it didn't move current
+ // token. This is also where we do initial comments handling such as removing starting and ending chars etc.
+ [self.lastComment setString:@""];
+ if ([self eof]) return NO;
+ if (![[self currentToken] isComment]) return NO;
+ NSUInteger previousSingleLineEndOffset = 0;
+ while (![self eof] && [[self currentToken] isComment]) {
+ PKToken *token = [self currentToken];
+ NSString *value = [[token stringValue] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
+
+ // Is this continuation of previous single line comment?
+ BOOL isSingleLiner = [value hasPrefix:@"///"];
+ BOOL isContinuingPreviousSingleLiner = (isSingleLiner && [token offset] == previousSingleLineEndOffset + 1);
+ if (!isContinuingPreviousSingleLiner) [self.lastComment setString:@""];
+
+ // Strip comment prefixes and suffixes.
+ if ([value hasPrefix:@"/// "]) value = [value substringFromIndex:4];
+ if ([value hasPrefix:@"///"]) value = [value substringFromIndex:3];
+ if ([value hasPrefix:@"/** "]) value = [value substringFromIndex:4];
+ if ([value hasPrefix:@"/**"]) value = [value substringFromIndex:3];
+ if ([value hasSuffix:@"*/"]) value = [value substringToIndex:[value length] - 2];
+ value = [value stringByTrimmingCharactersInSetFromEnd:[NSCharacterSet whitespaceCharacterSet]];
+
+ // Append comment string and new line if we're continuing previous single line comment.
+ if (isContinuingPreviousSingleLiner) [self.lastComment appendString:@"\n"];
+ [self.lastComment appendString:value];
+
+ // If we have single line comment, we should remember previous single line end offset.
+ if (isSingleLiner) previousSingleLineEndOffset = [token offset] + [[token stringValue] length];
+
+ // Proceed with next token.
+ self.tokenIndex++;
+ }
+ return YES;
+}
+
+- (NSString *)lastCommentString {
+ if ([self.lastComment length] == 0) return nil;
+ NSMutableString *result = [NSMutableString stringWithCapacity:[self.lastComment length]];
+ NSArray *lines = [self.lastComment componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
+ for (NSString *line in lines) {
+ if ([result length] > 0) [result appendFormat:@"\n"];
+ [result appendString:line];
+ }
+ return result;
+}
+
#pragma mark Helper methods
- (NSArray *)allTokensFromTokenizer:(PKTokenizer *)tokenizer {
- PKToken *token;
+ // Return all appledoc comments too, but ignore ordinary C comments!
+ BOOL reportsComments = tokenizer.commentState.reportsCommentTokens;
+ tokenizer.commentState.reportsCommentTokens = YES;
NSMutableArray *result = [NSMutableArray array];
+ PKToken *token;
while ((token = [tokenizer nextToken]) != [PKToken EOFToken]) {
+ if ([token isComment] && ![token isAppledocComment]) continue;
[result addObject:token];
}
+ tokenizer.commentState.reportsCommentTokens = reportsComments;
return result;
}
#pragma mark Properties
@synthesize tokens;
@synthesize tokenIndex;
+@synthesize lastComment;
@end
@@ -26,4 +26,13 @@
*/
- (BOOL)contains:(NSString *)string;
+/** Determines whether this token is an appledoc comment.
+
+ The method returns `YES` if the token is a comment and it has special appledoc comment prefix which for single line comments is composed
+ of three slashes and for multiple line comments from a single slash and two stars.
+
+ @return Returns `YES` if the token represents appledoc comment, `NO` otherwise.
+ */
+- (BOOL)isAppledocComment;
+
@end
@@ -18,4 +18,12 @@ - (BOOL)contains:(NSString *)string {
return ([[self stringValue] rangeOfString:string].location != NSNotFound);
}
+- (BOOL)isAppledocComment {
+ if ([self isComment]) {
+ if ([[self stringValue] hasPrefix:@"///"]) return YES;
+ if ([[self stringValue] hasPrefix:@"/**"]) return YES;
+ }
+ return NO;
+}
+
@end
Oops, something went wrong.

0 comments on commit 47e2c01

Please sign in to comment.