Skip to content

Commit

Permalink
Updated DTHTMLParser.
Browse files Browse the repository at this point in the history
This fixes a crash when encountering a processing instruction in HTML.
  • Loading branch information
odrobnik committed Aug 26, 2012
1 parent 8415d2f commit f6d5599
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 16 deletions.
14 changes: 12 additions & 2 deletions Core/Source/DTHTMLParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
// Copyright (c) 2012 Drobnik.com. All rights reserved.
//

#import <Foundation/Foundation.h>

@class DTHTMLParser;
/** The DTHTMLParserDelegate protocol defines the optional methods implemented by delegates of DTHTMLParser objects.
Dependencies: libxml2.dylib
*/
@protocol DTHTMLParserDelegate <NSObject>

Expand Down Expand Up @@ -74,6 +75,15 @@
*/
- (void)parser:(DTHTMLParser *)parser foundCDATA:(NSData *)CDATABlock;

/**
Sent by a parser object to its delegate when it encounters a processing instruction.
@param parser A DTHTMLParser object parsing HTML.
@param target A string representing the target of a processing instruction.
@param data A string representing the data for a processing instruction.
*/
- (void)parser:(DTHTMLParser *)parser foundProcessingInstructionWithTarget:(NSString *)target data:(NSString *)data;

/**
Sent by a parser object to its delegate when it encounters a fatal error.
Expand Down Expand Up @@ -129,7 +139,7 @@
/**
Sets the receiver’s delegate.
@param delegate An object that is the new delegate. It is not retained. The delegate must conform to the `DTHTMLParserDelegate` Protocol protocol.
@param delegate An object that is the new delegate. It is not retained. The delegate must conform to the DTHTMLParserDelegate Protocol protocol.
@see delegate
*/
Expand Down
39 changes: 28 additions & 11 deletions Core/Source/DTHTMLParser.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,17 @@
// DTFoundation
//
// Created by Oliver Drobnik on 1/18/12.
// Copyright (c) 2012 Drobnik.com. All rights reserved.
// Copyright (c) 2012 Cocoanetics. All rights reserved.
//

#import "DTHTMLParser.h"
#import <libxml/HTMLparser.h>

#if !__has_feature(objc_arc)
#error THIS CODE MUST BE COMPILED WITH ARC ENABLED!
#endif

@interface DTHTMLParser()

@property (nonatomic, strong) NSError *parserError;
@property (nonatomic, assign) NSStringEncoding encoding;

@end

Expand All @@ -29,10 +27,8 @@ @interface DTHTMLParser()
void _characters(void *context, const xmlChar *ch, int len);
void _comment(void *context, const xmlChar *value);
void _dterror(void *context, const char *msg, ...);

void _cdataBlock(void *context, const xmlChar *value, int len);

void _ignorableWhitespace (void *context, const xmlChar *ch, int len);
void _processingInstruction (void *context, const xmlChar *target, const xmlChar *data);

#pragma mark Event functions
void _startDocument(void *context)
Expand Down Expand Up @@ -117,7 +113,7 @@ void _characters(void *context, const xmlChar *chars, int len)
{
DTHTMLParser *myself = (__bridge DTHTMLParser *)context;

NSString *string = [[NSString alloc] initWithBytes:chars length:len encoding:NSUTF8StringEncoding];
NSString *string = [[NSString alloc] initWithBytes:chars length:len encoding:myself.encoding];

[myself.delegate parser:myself foundCharacters:string];
}
Expand All @@ -126,7 +122,7 @@ void _comment(void *context, const xmlChar *chars)
{
DTHTMLParser *myself = (__bridge DTHTMLParser *)context;

NSString *string = [NSString stringWithUTF8String:(char *)chars];
NSString *string = [NSString stringWithCString:(const char *)chars encoding:myself.encoding];

[myself.delegate parser:myself foundComment:string];
}
Expand Down Expand Up @@ -159,11 +155,22 @@ void _cdataBlock(void *context, const xmlChar *value, int len)
[myself.delegate parser:myself foundCDATA:data];
}

void _processingInstruction (void *context, const xmlChar *target, const xmlChar *data)
{
DTHTMLParser *myself = (__bridge DTHTMLParser *)context;

NSStringEncoding encoding = myself.encoding;

NSString *targetStr = [NSString stringWithCString:(const char *)target encoding:encoding];
NSString *dataStr = [NSString stringWithCString:(const char *)data encoding:encoding];

[myself.delegate parser:myself foundProcessingInstructionWithTarget:targetStr data:dataStr];
}

@implementation DTHTMLParser
{
htmlSAXHandler _handler;

NSStringEncoding _encoding;
NSData *_data;

__unsafe_unretained id <DTHTMLParserDelegate> _delegate;
Expand Down Expand Up @@ -258,6 +265,7 @@ - (void)abortParsing
_handler.characters = NULL;
_handler.comment = NULL;
_handler.error = NULL;
_handler.processingInstruction = NULL;

// inform delegate
if ([_delegate respondsToSelector:@selector(parser:parseErrorOccurred:)])
Expand Down Expand Up @@ -339,7 +347,7 @@ - (void)setDelegate:(__unsafe_unretained id<DTHTMLParserDelegate>)delegate;
{
_handler.error = NULL;
}

if ([delegate respondsToSelector:@selector(parser:foundCDATA:)])
{
_handler.cdataBlock = _cdataBlock;
Expand All @@ -348,6 +356,15 @@ - (void)setDelegate:(__unsafe_unretained id<DTHTMLParserDelegate>)delegate;
{
_handler.cdataBlock = NULL;
}

if ([delegate respondsToSelector:@selector(parser:foundProcessingInstructionWithTarget:data:)])
{
_handler.processingInstruction = _processingInstruction;
}
else
{
_handler.processingInstruction = NULL;
}
}

- (NSInteger)lineNumber
Expand Down
20 changes: 17 additions & 3 deletions Demo/Resources/CurrentTest.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
<p style="font-family:Helvetica"><i>italic</i></p>
<p style="font-size:20px;">东方盖饭</p>
<p style="font-size:20px;font-style:italic;">东方盖饭</p>
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<h2>Core Text Bugs</h2>
<h3>Bold for Chinese Glyphs</h3>
<p>This is a bug that was present before iOS 6, the Chinese glyphs in the following line would be bold if the locale was set to Chinese due to an incorrect entry in the global font cascade table. Filed as rdar://11262229</p>
<p style='font-size:25.0pt'>English and 中文 -- Chinese characters </p>

<h3>Italic for Chinese Glyphs in Fallback</h3>
<p style="font-size:25px;">东方盖饭</p>
<p style="font-size:25px;font-style:italic;">东方盖饭</p>

<h3>Extra space above lines with whitespace glyphs</h3>
<p style="font-family:Helvetica;font-size:30px;">Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla </p>
<p style="font-family:Helvetica;font-size:30px;">Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla Bla bla </p>

0 comments on commit f6d5599

Please sign in to comment.