Skip to content

Commit 673176f

Browse files
committed
Publicize HTMLStringEncodingForLabel() and HTMLInvalidStringEncoding().
1 parent 7e92c34 commit 673176f

File tree

9 files changed

+49
-24
lines changed

9 files changed

+49
-24
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
## [Unreleased]
44

5+
* Publicize functions `HTMLStringEncodingForLabel()` and `HTMLInvalidStringEncoding()`.
6+
* `HTMLStringEncodingForLabel()` is useful in any context where HTML-style encoding names can occur, such as a form element's `accept-charset` attribute. When looking up an encoding name fails, `HTMLInvalidStringEncoding()` is returned.
7+
58
## [2.1.6][]
69

710
* Fix "header missing from umbrella header" build error when using Swift Package Manager.

HTMLReader.xcodeproj/project.pbxproj

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@
8686
1C3C5BC41A809C8A0091E7E6 /* HTMLEncoding.m in Sources */ = {isa = PBXBuildFile; fileRef = 1C3C5BC01A809C8A0091E7E6 /* HTMLEncoding.m */; };
8787
1C3C5BC51A8201640091E7E6 /* HTMLEncoding.m in Sources */ = {isa = PBXBuildFile; fileRef = 1C3C5BC01A809C8A0091E7E6 /* HTMLEncoding.m */; };
8888
1C640EB4176BCA1C00919E5C /* HTMLTokenizer.m in Sources */ = {isa = PBXBuildFile; fileRef = 1C640EB3176BCA1C00919E5C /* HTMLTokenizer.m */; };
89+
1C65EDF1265B3BC20095BA29 /* HTMLEncoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */; settings = {ATTRIBUTES = (Public, ); }; };
90+
1C65EDF2265B3BC20095BA29 /* HTMLEncoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */; settings = {ATTRIBUTES = (Public, ); }; };
91+
1C65EDF3265B3BC20095BA29 /* HTMLEncoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */; settings = {ATTRIBUTES = (Public, ); }; };
92+
1C65EDF4265B3BC20095BA29 /* HTMLEncoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */; settings = {ATTRIBUTES = (Public, ); }; };
93+
1C65EDF5265B3CF90095BA29 /* HTMLEncoding.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */; };
8994
1C6C1F651A179D7900236076 /* HTMLComment.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CA5C21418D746D600147FE7 /* HTMLComment.h */; };
9095
1C6C1F661A179D7D00236076 /* HTMLDocument.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C25D3A6177BB78600F7C10D /* HTMLDocument.h */; settings = {ATTRIBUTES = (Public, ); }; };
9196
1C6C1F671A179D8300236076 /* HTMLDocumentType.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CA5C21918D7479C00147FE7 /* HTMLDocumentType.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -229,6 +234,7 @@
229234
dstPath = "include/${PRODUCT_NAME}";
230235
dstSubfolderSpec = 16;
231236
files = (
237+
1C65EDF5265B3CF90095BA29 /* HTMLEncoding.h in CopyFiles */,
232238
1CD0C54C1BDDBBFE00C3AC80 /* HTMLTextNode.h in CopyFiles */,
233239
66BD104F1BBF7CAC00B9346B /* HTMLComment.h in CopyFiles */,
234240
66BD104E1BBF7CA500B9346B /* NSString+HTMLEntities.h in CopyFiles */,
@@ -257,10 +263,11 @@
257263
1C25D40817837A8A00F7C10D /* HTMLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLParser.h; sourceTree = "<group>"; };
258264
1C25D40917837A8A00F7C10D /* HTMLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = HTMLParser.m; sourceTree = "<group>"; };
259265
1C319BC31C618863000DAA63 /* HTMLReader.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HTMLReader.framework; sourceTree = BUILT_PRODUCTS_DIR; };
260-
1C3C5BBF1A809C8A0091E7E6 /* HTMLEncoding.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLEncoding.h; sourceTree = "<group>"; };
266+
1C3C5BBF1A809C8A0091E7E6 /* HTMLEncoding+Private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "HTMLEncoding+Private.h"; sourceTree = "<group>"; };
261267
1C3C5BC01A809C8A0091E7E6 /* HTMLEncoding.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = HTMLEncoding.m; sourceTree = "<group>"; };
262268
1C640EB2176BCA1C00919E5C /* HTMLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLTokenizer.h; sourceTree = "<group>"; };
263269
1C640EB3176BCA1C00919E5C /* HTMLTokenizer.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = HTMLTokenizer.m; sourceTree = "<group>"; };
270+
1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HTMLEncoding.h; path = include/HTMLEncoding.h; sourceTree = "<group>"; };
264271
1C6C1F4C1A179BF600236076 /* HTMLReader.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HTMLReader.framework; sourceTree = BUILT_PRODUCTS_DIR; };
265272
1C88293B18369DD70051653C /* HTMLReader.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HTMLReader.framework; sourceTree = BUILT_PRODUCTS_DIR; };
266273
1C88294418369DD70051653C /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
@@ -523,7 +530,8 @@
523530
1CB15BBA1A9A4AE500176E73 /* Parser */ = {
524531
isa = PBXGroup;
525532
children = (
526-
1C3C5BBF1A809C8A0091E7E6 /* HTMLEncoding.h */,
533+
1C65EDF0265B3BC20095BA29 /* HTMLEncoding.h */,
534+
1C3C5BBF1A809C8A0091E7E6 /* HTMLEncoding+Private.h */,
527535
1C3C5BC01A809C8A0091E7E6 /* HTMLEncoding.m */,
528536
1C8E10581919F2570010007B /* HTMLEntities.h */,
529537
1C8E10591919F2570010007B /* HTMLEntities.m */,
@@ -561,6 +569,7 @@
561569
0D1077A41C1AC75C00CF9B41 /* HTMLDocumentType.h in Headers */,
562570
0D1077A51C1AC76400CF9B41 /* HTMLElement.h in Headers */,
563571
0D1077A61C1AC76600CF9B41 /* HTMLNamespace.h in Headers */,
572+
1C65EDF3265B3BC20095BA29 /* HTMLEncoding.h in Headers */,
564573
0D1077A71C1AC76800CF9B41 /* HTMLNode.h in Headers */,
565574
0D1077A81C1AC77200CF9B41 /* HTMLQuirksMode.h in Headers */,
566575
0D1077851C1AC36200CF9B41 /* HTMLReader.h in Headers */,
@@ -581,6 +590,7 @@
581590
1C319BD31C618970000DAA63 /* HTMLNode.h in Headers */,
582591
1C319BD61C618970000DAA63 /* HTMLTextNode.h in Headers */,
583592
1C319BCD1C618947000DAA63 /* HTMLDocument.h in Headers */,
593+
1C65EDF4265B3BC20095BA29 /* HTMLEncoding.h in Headers */,
584594
1C319BD71C618970000DAA63 /* HTMLReader.h in Headers */,
585595
1C319BE71C6189B6000DAA63 /* HTMLSelector.h in Headers */,
586596
1C319BD11C618970000DAA63 /* HTMLElement.h in Headers */,
@@ -601,6 +611,7 @@
601611
1C6C1F671A179D8300236076 /* HTMLDocumentType.h in Headers */,
602612
1C6C1F681A179D8B00236076 /* HTMLElement.h in Headers */,
603613
1C6C1FE01A17A05E00236076 /* HTMLNamespace.h in Headers */,
614+
1C65EDF1265B3BC20095BA29 /* HTMLEncoding.h in Headers */,
604615
1C6C1F6A1A179D9900236076 /* HTMLNode.h in Headers */,
605616
1C6C1FE21A17A07200236076 /* HTMLQuirksMode.h in Headers */,
606617
1C6C1F6C1A179DB600236076 /* HTMLReader.h in Headers */,
@@ -621,6 +632,7 @@
621632
1CA5C21B18D7479C00147FE7 /* HTMLDocumentType.h in Headers */,
622633
1CA5C21118D7457400147FE7 /* HTMLElement.h in Headers */,
623634
1C6C1FE11A17A05E00236076 /* HTMLNamespace.h in Headers */,
635+
1C65EDF2265B3BC20095BA29 /* HTMLEncoding.h in Headers */,
624636
1C88296D18369E090051653C /* HTMLNode.h in Headers */,
625637
1C6C1FE31A17A07300236076 /* HTMLQuirksMode.h in Headers */,
626638
1C88296E18369E090051653C /* HTMLReader.h in Headers */,

HTMLReaderTests/HTMLEncodingTests.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ + (instancetype)testFromScanner:(DataScanner *)scanner
101101

102102
- (NSStringEncoding)correctEncoding
103103
{
104-
return StringEncodingForLabel(self.correctEncodingLabel);
104+
return HTMLStringEncodingForLabel(self.correctEncodingLabel);
105105
}
106106

107107
- (NSString *)testString
Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
// HTMLEncoding.h
1+
// HTMLEncoding+Private.h
22
//
33
// Public domain. https://github.com/nolanw/HTMLReader
44

5-
#import <Foundation/Foundation.h>
5+
#import "HTMLEncoding.h"
66

77
/// Tags a string encoding with a confidence that the parser can use to help determine how to decode bytes into a document.
88
typedef struct {
@@ -24,12 +24,6 @@ typedef struct {
2424
*/
2525
extern HTMLStringEncoding DeterminedStringEncodingForData(NSData *data, NSString *contentType, NSString **outDecodedString);
2626

27-
/// Returns the string encoding labeled according to the WHATWG Encoding Standard. Returns InvalidStringEncoding() if the label is unknown.
28-
extern NSStringEncoding StringEncodingForLabel(NSString *label);
29-
30-
/// An invalid NSStringEncoding. Equal to CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingInvalidId).
31-
extern NSStringEncoding InvalidStringEncoding(void);
32-
3327
/**
3428
Returns YES if encoding "is a single-byte or variable-length encoding in which the bytes 0x09, 0x0A, 0x0C, 0x0D, 0x20 - 0x22, 0x26, 0x27, 0x2C - 0x3F, 0x41 - 0x5A, and 0x61 - 0x7A, ignoring bytes that are the second and later bytes of multibyte sequences, all correspond to single-byte sequences that map to the same Unicode characters as those bytes in Windows-1252".
3529

Sources/HTMLEncoding.m

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// Public domain. https://github.com/nolanw/HTMLReader
44

5-
#import "HTMLEncoding.h"
5+
#import "HTMLEncoding+Private.h"
66

77
/**
88
* Returns the name of an encoding given by a label, as specified in the WHATWG Encoding standard, or nil if the label has no associated name.
@@ -12,7 +12,7 @@
1212
static NSString * NamedEncodingForLabel(NSString *label);
1313

1414
/**
15-
* Returns the string encoding given by a name from the WHATWG Encoding Standard, or the result of InvalidStringEncoding() if there is no known encoding given by name.
15+
* Returns the string encoding given by a name from the WHATWG Encoding Standard, or the result of HTMLInvalidStringEncoding() if there is no known encoding given by name.
1616
*/
1717
static NSStringEncoding StringEncodingForName(NSString *name);
1818

@@ -57,8 +57,8 @@ HTMLStringEncoding DeterminedStringEncodingForData(NSData *data, NSString *conte
5757
[scanner scanString:@"\"" intoString:nil];
5858
NSString *encodingLabel;
5959
if ([scanner scanUpToString:@"\"" intoString:&encodingLabel]) {
60-
NSStringEncoding encoding = StringEncodingForLabel(encodingLabel);
61-
if (encoding != InvalidStringEncoding()) {
60+
NSStringEncoding encoding = HTMLStringEncodingForLabel(encodingLabel);
61+
if (encoding != HTMLInvalidStringEncoding()) {
6262
NSString *decodedString = [[NSString alloc] initWithData:data encoding:encoding];
6363
if (decodedString) {
6464
*outDecodedString = decodedString;
@@ -400,23 +400,23 @@ static NSStringEncoding StringEncodingForName(NSString *name)
400400
if (match) {
401401
return CFStringConvertEncodingToNSStringEncoding(match->encoding);
402402
} else {
403-
return InvalidStringEncoding();
403+
return HTMLInvalidStringEncoding();
404404
}
405405
}
406406

407-
NSStringEncoding InvalidStringEncoding(void)
407+
NSStringEncoding HTMLInvalidStringEncoding(void)
408408
{
409409
return CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingInvalidId);
410410
}
411411

412-
NSStringEncoding StringEncodingForLabel(NSString *untrimmedLabel)
412+
NSStringEncoding HTMLStringEncodingForLabel(NSString *untrimmedLabel)
413413
{
414414
NSString *label = [untrimmedLabel stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
415415
NSString *name = NamedEncodingForLabel(label);
416416
if (name) {
417417
return StringEncodingForName(name);
418418
} else {
419-
return InvalidStringEncoding();
419+
return HTMLInvalidStringEncoding();
420420
}
421421
}
422422

Sources/HTMLParser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#import <Foundation/Foundation.h>
66
#import "HTMLDocument.h"
77
#import "HTMLElement.h"
8-
#import "HTMLEncoding.h"
8+
#import "HTMLEncoding+Private.h"
99

1010
/**
1111
An HTMLParser turns a string into an HTMLDocument.

Sources/HTMLParser.m

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,8 +449,8 @@ - (void)inHeadInsertionModeHandleStartTagToken:(HTMLStartTagToken *)token
449449
if (self.encoding.confidence == Tentative) {
450450
NSString *charset = [token.attributes objectForKey:@"charset"];
451451
if (charset) {
452-
NSStringEncoding encoding = StringEncodingForLabel(charset);
453-
if (encoding != InvalidStringEncoding() && (IsASCIICompatibleEncoding(encoding) || IsUTF16Encoding(encoding))) {
452+
NSStringEncoding encoding = HTMLStringEncodingForLabel(charset);
453+
if (encoding != HTMLInvalidStringEncoding() && (IsASCIICompatibleEncoding(encoding) || IsUTF16Encoding(encoding))) {
454454
[self changeEncoding:encoding];
455455
}
456456
} else if ([token.attributes objectForKey:@"http-equiv"] && [[token.attributes objectForKey:@"http-equiv"] caseInsensitiveCompare:@"Content-Type"] == NSOrderedSame) {
@@ -487,8 +487,8 @@ - (void)inHeadInsertionModeHandleStartTagToken:(HTMLStartTagToken *)token
487487
}
488488

489489
if (encodingLabel) {
490-
NSStringEncoding encoding = StringEncodingForLabel(encodingLabel);
491-
if (encoding != InvalidStringEncoding() && (IsASCIICompatibleEncoding(encoding) || IsUTF16Encoding(encoding))) {
490+
NSStringEncoding encoding = HTMLStringEncodingForLabel(encodingLabel);
491+
if (encoding != HTMLInvalidStringEncoding() && (IsASCIICompatibleEncoding(encoding) || IsUTF16Encoding(encoding))) {
492492
[self changeEncoding:encoding];
493493
}
494494
}

Sources/include/HTMLEncoding.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// HTMLEncoding.h
2+
//
3+
// Public domain. https://github.com/nolanw/HTMLReader
4+
5+
#import <Foundation/Foundation.h>
6+
7+
NS_ASSUME_NONNULL_BEGIN
8+
9+
/// Returns the string encoding labeled according to the WHATWG Encoding Standard. Returns HTMLInvalidStringEncoding() if the label is unknown.
10+
extern NSStringEncoding HTMLStringEncodingForLabel(NSString *label);
11+
12+
/// An invalid NSStringEncoding. Equal to CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingInvalidId).
13+
extern NSStringEncoding HTMLInvalidStringEncoding(void);
14+
15+
NS_ASSUME_NONNULL_END

Sources/include/HTMLReader.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// Public domain. https://github.com/nolanw/HTMLReader
44

55
#import "HTMLDocument.h"
6+
#import "HTMLEncoding.h"
67
#import "HTMLSelector.h"
78
#import "HTMLSerialization.h"
89
#import "HTMLTextNode.h"

0 commit comments

Comments
 (0)