Skip to content

Commit

Permalink
[Standard update] Forbid C0 controls and U+007F in domains
Browse files Browse the repository at this point in the history
  • Loading branch information
karwa committed Mar 19, 2022
1 parent 5da7600 commit 764b09e
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 16 deletions.
14 changes: 7 additions & 7 deletions Sources/WebURL/Parser/Parser+Host.swift
Expand Up @@ -104,8 +104,8 @@ extension ParsedHost {
hostnameInfo.encodedCount &+= 2
continue
}
if asciiChar.isForbiddenHostCodePoint, asciiChar != .percentSign {
callback.validationError(.hostForbiddenCodePoint)
if asciiChar.isForbiddenHostCodePoint {
callback.validationError(.hostOrDomainForbiddenCodePoint)
return nil
}
if URLEncodeSet.C0Control().shouldPercentEncode(ascii: asciiChar.codePoint) {
Expand All @@ -129,8 +129,8 @@ extension ParsedHost {
// TODO: Handle domains conaining Unicode or IDNA labels.
callback.validationError(.domainToASCIIFailure)
return nil
case .forbiddenHostCodePoint:
callback.validationError(.hostForbiddenCodePoint)
case .forbiddenDomainCodePoint:
callback.validationError(.hostOrDomainForbiddenCodePoint)
return nil
case .endsInANumber:
guard let address = IPv4Address(utf8: domain) else {
Expand Down Expand Up @@ -167,8 +167,8 @@ extension ParsedHost {
guard let char = ASCII(domain[i]) else {
return .containsUnicodeOrIDNA
}
if char.isForbiddenHostCodePoint {
return .forbiddenHostCodePoint
if char.isForbiddenDomainCodePoint {
return .forbiddenDomainCodePoint
}
domainInfo.needsLowercasing = domainInfo.needsLowercasing || char.isUppercaseAlpha
domainInfo.decodedCount &+= 1
Expand Down Expand Up @@ -237,7 +237,7 @@ internal enum _DomainParseResult {

/// The given domain contains forbidden host code-points.
///
case forbiddenHostCodePoint
case forbiddenDomainCodePoint

/// The given domain's final label is a number, according to https://url.spec.whatwg.org/#ends-in-a-number-checker
/// It should be parsed as an IPv4 address, rather than a domain.
Expand Down
25 changes: 22 additions & 3 deletions Sources/WebURL/Parser/Parser+StringUtils.swift
Expand Up @@ -224,20 +224,39 @@ extension ASCII {
/// Returns `true` if this character is a forbidden host code point, otherwise `false`.
///
/// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR,
/// U+0020 SPACE, U+0023 (#), U+0025 (%), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
/// U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|).
/// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>), U+003F (?),
/// U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|).
///
/// https://url.spec.whatwg.org/#host-miscellaneous
///
@inlinable
internal var isForbiddenHostCodePoint: Bool {
// FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210
let lo: UInt64 = 0b11010100_00000000_10000000_00101001_00000000_00000000_00100110_00000001
let lo: UInt64 = 0b11010100_00000000_10000000_00001001_00000000_00000000_00100110_00000001
let hi: UInt64 = 0b00010000_00000000_00000000_00000000_01111000_00000000_00000000_00000001
if self.codePoint < 64 {
return lo & (1 &<< self.codePoint) != 0
} else {
return hi & (1 &<< (self.codePoint &- 64)) != 0
}
}

/// Returns `true` if this character is a forbidden host code point, otherwise `false`.
///
/// A forbidden domain code point is a forbiden host code point, a C0 control, U+0025 (%),
/// or U+007F DELETE.
///
/// https://url.spec.whatwg.org/#host-miscellaneous
///
@inlinable
internal var isForbiddenDomainCodePoint: Bool {
// FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210_FEDCBA98_76543210
let lo: UInt64 = 0b11010100_00000000_10000000_00101001_11111111_11111111_11111111_11111111
let hi: UInt64 = 0b10010000_00000000_00000000_00000000_01111000_00000000_00000000_00000001
if self.codePoint < 64 {
return lo & (1 &<< self.codePoint) != 0
} else {
return hi & (1 &<< (self.codePoint &- 64)) != 0
}
}
}
6 changes: 3 additions & 3 deletions Sources/WebURL/Parser/ValidationError.swift
Expand Up @@ -96,7 +96,7 @@ extension ValidationError {
@inlinable internal static var unclosedIPv6Address: Self { Self(_code: 20) }
@inlinable internal static var domainToASCIIFailure: Self { Self(_code: 21) }
@inlinable internal static var domainToASCIIEmptyDomainFailure: Self { Self(_code: 22) }
@inlinable internal static var hostForbiddenCodePoint: Self { Self(_code: 23) }
@inlinable internal static var hostOrDomainForbiddenCodePoint: Self { Self(_code: 23) }
@inlinable internal static var invalidIPv6Address: Self { Self(_code: 24) }
@inlinable internal static var invalidIPv4Address: Self { Self(_code: 25) }
// This one is not in the standard.
Expand Down Expand Up @@ -261,9 +261,9 @@ extension ValidationError: CustomStringConvertible {
This can be caused by many things, such as the domain consisting only of ignorable code points,
or if the domain is the string "xn--".
"""#
case .hostForbiddenCodePoint:
case .hostOrDomainForbiddenCodePoint:
return #"""
The input’s host contains a forbidden host code point. Note that hosts are percent-decoded before
The input’s host or domain contains a forbidden code point. Note that hosts are percent-decoded before
being processed when the URL's scheme is special, which would result in the following URL having a hostname
of "exa#mple.org" (which contains the forbidden host code point "#").
Expand Down
17 changes: 14 additions & 3 deletions Tests/WebURLTests/OtherUtilitiesTests.swift
Expand Up @@ -111,18 +111,29 @@ extension OtherUtilitiesTests {
XCTAssertTrue(hasNonURLCodePoints(utf8: [0xED, 0xBF, 0xBF])) // DFFF
}

func testForbiddenHostCodePoint() {
func testForbiddenHostAndDomainCodePoints() {
/// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR,
/// U+0020 SPACE, U+0023 (#), U+0025 (%), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
/// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
/// U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|).

/// A forbidden domain code point is a forbiden host code point, a C0 control, U+0025 (%),
/// or U+007F DELETE.
for char in ASCII.allCharacters {
switch char {
case .null, .horizontalTab, .lineFeed, .carriageReturn, .space, .numberSign, .percentSign, .forwardSlash,
case .null, .horizontalTab, .lineFeed, .carriageReturn, .space, .numberSign, .forwardSlash,
.colon, .lessThanSign, .greaterThanSign, .questionMark, .commercialAt, .leftSquareBracket, .backslash,
.rightSquareBracket, .circumflexAccent, .verticalBar:
XCTAssertTrue(char.isForbiddenHostCodePoint)
XCTAssertTrue(char.isForbiddenDomainCodePoint)
case .percentSign, .delete:
XCTAssertFalse(char.isForbiddenHostCodePoint)
XCTAssertTrue(char.isForbiddenDomainCodePoint)
case _ where ASCII.ranges.c0Control.contains(char):
XCTAssertFalse(char.isForbiddenHostCodePoint)
XCTAssertTrue(char.isForbiddenDomainCodePoint)
default:
XCTAssertFalse(char.isForbiddenHostCodePoint)
XCTAssertFalse(char.isForbiddenDomainCodePoint)
}
}
}
Expand Down

0 comments on commit 764b09e

Please sign in to comment.