Skip to content
This repository
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 295 lines (253 sloc) 11.414 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
//
// GBTokenizer.m
// appledoc
//
// Created by Tomaz Kragelj on 25.7.10.
// Copyright (C) 2010, Gentle Bytes. All rights reserved.
//

#import "RegexKitLite.h"
#import "PKToken+GBToken.h"
#import "GBSourceInfo.h"
#import "GBComment.h"
#import "GBTokenizer.h"

@interface GBTokenizer ()

- (BOOL)consumeComments;
- (NSString *)commentValueFromString:(NSString *)value;
- (NSArray *)allTokensFromTokenizer:(PKTokenizer *)tokenizer;
@property (retain) NSString *filename;
@property (retain) NSString *input;
@property (retain) NSArray *tokens;
@property (assign) NSUInteger tokenIndex;
@property (retain) NSMutableString *lastCommentBuilder;
@property (retain) NSMutableString *previousCommentBuilder;
@property (retain) GBSourceInfo *lastCommentSourceInfo;
@property (retain) GBSourceInfo *previousCommentSourceInfo;
@property (retain) NSString *singleLineCommentRegex;
@property (retain) NSString *multiLineCommentRegex;
@property (retain) NSString *commentDelimiterRegex;

@end

#pragma mark -

@implementation GBTokenizer

#pragma mark Initialization & disposal

+ (id)tokenizerWithSource:(PKTokenizer *)tokenizer filename:(NSString *)filename {
return [[[self alloc] initWithSourceTokenizer:tokenizer filename:filename] autorelease];
}

- (id)initWithSourceTokenizer:(PKTokenizer *)tokenizer filename:(NSString *)filename {
NSParameterAssert(tokenizer != nil);
NSParameterAssert(filename != nil);
NSParameterAssert([filename length] > 0);
GBLogDebug(@"Initializing tokenizer using %@...", tokenizer);
self = [super init];
if (self) {
self.singleLineCommentRegex = @"(?m-s:\\s*///(.*)$)";
self.multiLineCommentRegex = @"(?s:/\\*\\*(.*)\\*/)";
self.commentDelimiterRegex = @"^[!@#$%^&*()_=+`~,<.>/?;:'\"-]{3,}";
self.tokenIndex = 0;
self.lastCommentBuilder = [NSMutableString string];
self.previousCommentBuilder = [NSMutableString string];
self.filename = [filename lastPathComponent];
self.input = tokenizer.string;
self.tokens = [self allTokensFromTokenizer:tokenizer];
[self consumeComments];
}
return self;
}

#pragma mark Tokenizing handling

- (PKToken *)lookahead:(NSUInteger)offset {
NSUInteger delta = 0;
NSUInteger counter = 0;
while (counter <= offset) {
NSUInteger index = self.tokenIndex + delta;
if (index >= [self.tokens count]) return [PKToken EOFToken];
if ([[self.tokens objectAtIndex:index] isComment]) {
delta++;
continue;
}
delta++;
counter++;
}
return [self.tokens objectAtIndex:self.tokenIndex + delta - 1];
}

- (PKToken *)currentToken {
if ([self eof]) return [PKToken EOFToken];
return [self.tokens objectAtIndex:self.tokenIndex];
}

- (void)consume:(NSUInteger)count {
if (count == 0) return;
while (count > 0 && ![self eof]) {
self.tokenIndex++;
[self consumeComments];
count--;
}
}

- (void)consumeTo:(NSString *)end usingBlock:(void (^)(PKToken *token, BOOL *consume, BOOL *stop))block {
[self consumeFrom:nil to:end usingBlock:block];
}

- (void)consumeFrom:(NSString *)start to:(NSString *)end usingBlock:(void (^)(PKToken *token, BOOL *consume, BOOL *stop))block {
// Skip starting token.
if (start) {
if (![[self currentToken] matches:start]) return;
[self consume:1];
}

// Report all tokens until EOF or ending token is found.
NSUInteger level = 1;
BOOL quit = NO;
while (![self eof]) {
// Handle multiple hierarchy.
if (start && [[self currentToken] matches:start]) level++;
if ([[self currentToken] matches:end]) {
if (!start) break;
if (--level == 0) break;
}

// Report the token.
BOOL consume = YES;
block([self currentToken], &consume, &quit);
if (consume) [self consume:1];
if (quit) break;
}

// Skip ending token if found.
if ([[self currentToken] matches:end]) [self consume:1];
}

- (BOOL)eof {
return (self.tokenIndex >= [self.tokens count]);
}

#pragma mark Token information handling

- (GBSourceInfo *)sourceInfoForCurrentToken {
return [self sourceInfoForToken:[self currentToken]];
}

- (GBSourceInfo *)sourceInfoForToken:(PKToken *)token {
NSParameterAssert(token != nil);
NSUInteger lines = [self.input numberOfLinesInRange:NSMakeRange(0, [token offset])];
return [GBSourceInfo infoWithFilename:self.filename lineNumber:lines];
}

#pragma mark Comments handling

- (BOOL)consumeComments {
// This method checks if current token is a comment and consumes all comments until non-comment token is detected or EOF reached. The result of the method is that current index is positioned on the first non-comment token. If current token is not comment, the method doesn't do anything, but simply returns NO to indicate it didn't find a comment and therefore it didn't move current token. This is also where we do initial comments handling such as removing starting and ending chars etc.
[self.previousCommentBuilder setString:@""];
[self.lastCommentBuilder setString:@""];
self.previousCommentSourceInfo = nil;
self.lastCommentSourceInfo = nil;
if ([self eof]) return NO;
if (![[self currentToken] isComment]) return NO;

PKToken *startingPreviousToken = nil;
PKToken *startingLastToken = nil;
NSUInteger previousSingleLineEndOffset = 0;
while (![self eof] && [[self currentToken] isComment]) {
PKToken *token = [self currentToken];
NSString *value = nil;

// Match single line comments. Note that we can simplify the code with assumption that there's only one single line comment per match. If regex finds more (should never happen though), we simply combine them together. Then we check if the comment is a continuation of previous single liner by testing the string offset. If so we group the values together, otherwise we create a new single line comment. Finally we remember current comment offset to allow grouping of next single line comment. CAUTION: this algorithm won't group comments unless they start at the beginning of the line!
NSArray *singleLiners = [[token stringValue] componentsMatchedByRegex:self.singleLineCommentRegex capture:1];
if ([singleLiners count] > 0) {
value = [NSString string];
for (NSString *match in singleLiners) value = [value stringByAppendingString:match];
BOOL isContinuingPreviousSingleLiner = ([token offset] == previousSingleLineEndOffset + 1);
if (isContinuingPreviousSingleLiner) {
[self.lastCommentBuilder appendString:@"\n"];
} else {
[self.previousCommentBuilder setString:self.lastCommentBuilder];
startingPreviousToken = startingLastToken;
[self.lastCommentBuilder setString:@""];
startingLastToken = token;
}
previousSingleLineEndOffset = [token offset] + [[token stringValue] length];
}

// Match multiple line comments and only process last (in reality we should only have one comment in each mutliline comment token, but let's handle any strange cases graceosly).
else {
NSArray *multiLiners = [[token stringValue] componentsMatchedByRegex:self.multiLineCommentRegex capture:1];
value = [multiLiners lastObject];
[self.previousCommentBuilder setString:self.lastCommentBuilder];
startingPreviousToken = startingLastToken;
[self.lastCommentBuilder setString:@""];
startingLastToken = token;
}

// Append string value to current comment and proceed with next token.
value = [value stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
[self.lastCommentBuilder appendString:value];
self.tokenIndex++;
}

if (startingPreviousToken) self.previousCommentSourceInfo = [self sourceInfoForToken:startingPreviousToken];
if (startingLastToken) self.lastCommentSourceInfo = [self sourceInfoForToken:startingLastToken];
return YES;
}

- (NSString *)commentValueFromString:(NSString *)value {
if ([value length] == 0) return nil;
NSArray *lines = [value componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
NSMutableArray *comments = [NSMutableArray arrayWithCapacity:[lines count]];

// First pass: removes delimiters. We simply detect 3+ delimiter chars in any combination. If removing delimiter yields empty line, discard it.
[lines enumerateObjectsUsingBlock:^(NSString *line, NSUInteger idx, BOOL *stop) {
NSString *stripped = [line stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
NSString *delimited = [stripped stringByReplacingOccurrencesOfRegex:self.commentDelimiterRegex withString:@""];
if ([stripped length] > [delimited length]) {
if ([delimited length] > 0) [comments addObject:delimited];
return;
}
[comments addObject:line];
}];

// If all lines start with a *, ignore the prefix. Note that we ignore first line as it can only contain /** and text! We also ignore last line as if it only contains */
NSString *prefixRegex = @"(?m:^\\s*\\*[ ]*)";
__block BOOL stripPrefix = ([comments count] > 1);
if (stripPrefix) {
[comments enumerateObjectsUsingBlock:^(NSString *line, NSUInteger idx, BOOL *stop) {
NSString *stripped = [line stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
if (idx == [comments count]-1 && [stripped length] == 0) {
return;
}
if (idx > 0 && ![stripped isMatchedByRegex:prefixRegex]) {
stripPrefix = NO;
*stop = YES;
}
}];
}

// Finally remove common line prefix including all spaces and compose all objects into final comment.
NSCharacterSet *spacesSet = [NSCharacterSet characterSetWithCharactersInString:@" "];
NSMutableString *result = [NSMutableString stringWithCapacity:[value length]];
[comments enumerateObjectsUsingBlock:^(NSString *line, NSUInteger idx, BOOL *stop) {
if (stripPrefix)
line = [line stringByReplacingOccurrencesOfRegex:prefixRegex withString:@""];
line = [line stringByTrimmingCharactersInSet:spacesSet];
[result appendString:line];
if (idx < [comments count] - 1) [result appendString:@"\n"];
}];

// If the result is empty string, return nil, otherwise return the comment string.
if ([result length] == 0) return nil;
return result;
}

- (GBComment *)lastComment {
if ([self.lastCommentBuilder length] == 0) return nil;
NSString *value = [self commentValueFromString:self.lastCommentBuilder];
return [GBComment commentWithStringValue:value sourceInfo:self.lastCommentSourceInfo];
}

- (GBComment *)previousComment {
if ([self.previousCommentBuilder length] == 0) return nil;
NSString *value = [self commentValueFromString:self.previousCommentBuilder];
return [GBComment commentWithStringValue:value sourceInfo:self.previousCommentSourceInfo];
}

#pragma mark Helper methods

- (NSArray *)allTokensFromTokenizer:(PKTokenizer *)tokenizer {
// Return all appledoc comments too, but ignore ordinary C comments!
BOOL reportsComments = tokenizer.commentState.reportsCommentTokens;
tokenizer.commentState.reportsCommentTokens = YES;
NSMutableArray *result = [NSMutableArray array];
PKToken *token;
while ((token = [tokenizer nextToken]) != [PKToken EOFToken]) {
if ([token isComment] && ![token isAppledocComment]) continue;
[result addObject:token];
}
tokenizer.commentState.reportsCommentTokens = reportsComments;
return result;
}

#pragma mark Properties

@synthesize filename;
@synthesize input;
@synthesize tokens;
@synthesize tokenIndex;
@synthesize lastComment;
@synthesize lastCommentBuilder;
@synthesize lastCommentSourceInfo;
@synthesize previousComment;
@synthesize previousCommentBuilder;
@synthesize previousCommentSourceInfo;
@synthesize singleLineCommentRegex;
@synthesize multiLineCommentRegex;
@synthesize commentDelimiterRegex;

@end
Something went wrong with that request. Please try again.