Skip to content

Commit

Permalink
Merge pull request apple#23706 from milseman/5_1_gab_barg
Browse files Browse the repository at this point in the history
[5.1][SE-0248] String Gaps
  • Loading branch information
milseman committed Apr 2, 2019
2 parents fbc1377 + 51d158b commit 3c1d559
Show file tree
Hide file tree
Showing 25 changed files with 679 additions and 392 deletions.
24 changes: 20 additions & 4 deletions stdlib/public/Darwin/Foundation/NSRange.swift
Expand Up @@ -175,17 +175,33 @@ extension Range where Bound == Int {
}

extension Range where Bound == String.Index {
public init?(_ range: NSRange, in string: __shared String) {
private init?<S: StringProtocol>(
_ range: NSRange, _genericIn string: __shared S
) {
// Corresponding stdlib version
guard #available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) else {
fatalError()
}
let u = string.utf16
guard range.location != NSNotFound,
let start = u.index(u.startIndex, offsetBy: range.location, limitedBy: u.endIndex),
let end = u.index(u.startIndex, offsetBy: range.location + range.length, limitedBy: u.endIndex),
let start = u.index(
u.startIndex, offsetBy: range.location, limitedBy: u.endIndex),
let end = u.index(
start, offsetBy: range.length, limitedBy: u.endIndex),
let lowerBound = String.Index(start, within: string),
let upperBound = String.Index(end, within: string)
else { return nil }

self = lowerBound..<upperBound
}

public init?(_ range: NSRange, in string: __shared String) {
self.init(range, _genericIn: string)
}
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
public init?<S: StringProtocol>(_ range: NSRange, in string: __shared S) {
self.init(range, _genericIn: string)
}
}

extension NSRange : CustomReflectable {
Expand Down
4 changes: 4 additions & 0 deletions stdlib/public/core/ASCII.swift
Expand Up @@ -23,6 +23,10 @@ extension Unicode.ASCII : Unicode.Encoding {
return EncodedScalar(0x1a) // U+001A SUBSTITUTE; best we can do for ASCII
}

/// Returns whether the given code unit represents an ASCII scalar
@_alwaysEmitIntoClient
public static func isASCII(_ x: CodeUnit) -> Bool { return UTF8.isASCII(x) }

@inline(__always)
@inlinable
public static func _isScalar(_ x: CodeUnit) -> Bool {
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/CMakeLists.txt
Expand Up @@ -90,6 +90,7 @@ set(SWIFTLIB_ESSENTIAL
KeyValuePairs.swift
LazyCollection.swift
LazySequence.swift
LegacyABI.swift
LifetimeManager.swift
ManagedBuffer.swift
Map.swift
Expand Down
27 changes: 14 additions & 13 deletions stdlib/public/core/Character.swift
Expand Up @@ -88,25 +88,26 @@ extension Character {
}

extension Character {
@usableFromInline
typealias UTF8View = String.UTF8View
/// A view of a character's contents as a collection of UTF-8 code units. See
/// String.UTF8View for more information
public typealias UTF8View = String.UTF8View

/// A UTF-8 encoding of `self`.
@inlinable
internal var utf8: UTF8View {
return _str.utf8
}
@usableFromInline
typealias UTF16View = String.UTF16View
public var utf8: UTF8View { return _str.utf8 }

/// A view of a character's contents as a collection of UTF-16 code units. See
/// String.UTF16View for more information
public typealias UTF16View = String.UTF16View

/// A UTF-16 encoding of `self`.
@inlinable
internal var utf16: UTF16View {
return _str.utf16
}
public var utf16: UTF16View { return _str.utf16 }

public typealias UnicodeScalarView = String.UnicodeScalarView

@inlinable
public var unicodeScalars: UnicodeScalarView {
return _str.unicodeScalars
}
public var unicodeScalars: UnicodeScalarView { return _str.unicodeScalars }
}

extension Character :
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/GroupInfo.json
Expand Up @@ -223,6 +223,7 @@
"Equatable.swift",
"Comparable.swift",
"Codable.swift",
"LegacyABI.swift",
"MigrationSupport.swift"
],
"Result": [
Expand Down
41 changes: 41 additions & 0 deletions stdlib/public/core/LegacyABI.swift
@@ -0,0 +1,41 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// This file contains non-API (or underscored) declarations that are needed to
// be kept around for ABI compatibility

extension Unicode.UTF16 {
@available(*, unavailable, renamed: "Unicode.UTF16.isASCII")
@inlinable
public static func _isASCII(_ x: CodeUnit) -> Bool {
return Unicode.UTF16.isASCII(x)
}
}

@available(*, unavailable, renamed: "Unicode.UTF8.isASCII")
@inlinable
internal func _isASCII(_ x: UInt8) -> Bool {
return Unicode.UTF8.isASCII(x)
}

@available(*, unavailable, renamed: "Unicode.UTF8.isContinuation")
@inlinable
internal func _isContinuation(_ x: UInt8) -> Bool {
return UTF8.isContinuation(x)
}

extension Substring {
@available(*, unavailable, renamed: "Substring.base")
@inlinable
internal var _wholeString: String { return base }
}

2 changes: 1 addition & 1 deletion stdlib/public/core/StringComparison.swift
Expand Up @@ -239,7 +239,7 @@ private func _findBoundary(
}

// Back up to scalar boundary
while _isContinuation(utf8[_unchecked: idx]) {
while UTF8.isContinuation(utf8[_unchecked: idx]) {
idx &-= 1
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringCreate.swift
Expand Up @@ -193,8 +193,8 @@ extension String {
internal static func _fromSubstring(
_ substring: __shared Substring
) -> String {
if substring._offsetRange == substring._wholeString._offsetRange {
return substring._wholeString
if substring._offsetRange == substring.base._offsetRange {
return substring.base
}

return String._copying(substring)
Expand Down
67 changes: 59 additions & 8 deletions stdlib/public/core/StringIndexConversions.swift
Expand Up @@ -11,6 +11,60 @@
//===----------------------------------------------------------------------===//

extension String.Index {
private init?<S: StringProtocol>(
_ idx: String.Index, _genericWithin target: S
) {
guard target._wholeGuts.isOnGraphemeClusterBoundary(idx),
idx >= target.startIndex && idx <= target.endIndex
else {
return nil
}

self = idx
}

/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
/// If the index passed as `sourcePosition` represents the start of an
/// extended grapheme cluster---the element type of a string---then the
/// initializer succeeds.
///
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string. The character at that
/// position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(cafe[...stringIndex])
/// // Prints "Café"
///
/// If the index passed as `sourcePosition` doesn't have an exact
/// corresponding position in `target`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the combining
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
/// their own position in a string.
///
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
///
/// print(nextStringIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - sourcePosition: A position in a view of the `target` parameter.
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(_ sourcePosition: String.Index, within target: String) {
self.init(sourcePosition, _genericWithin: target)
}

/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
Expand Down Expand Up @@ -49,14 +103,11 @@ extension String.Index {
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(
_ sourcePosition: String.Index,
within target: String
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
public init?<S: StringProtocol>(
_ sourcePosition: String.Index, within target: S
) {
guard target._guts.isOnGraphemeClusterBoundary(sourcePosition) else {
return nil
}
self = sourcePosition
self.init(sourcePosition, _genericWithin: target)
}

/// Returns the position in the given UTF-8 view that corresponds exactly to
Expand All @@ -81,7 +132,7 @@ extension String.Index {
/// position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringNormalization.swift
Expand Up @@ -108,7 +108,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
if index == 0 || index == count {
return true
}
assert(!_isContinuation(self[_unchecked: index]))
assert(!UTF8.isContinuation(self[_unchecked: index]))

// Sub-300 latiny fast-path
if self[_unchecked: index] < 0xCC { return true }
Expand Down Expand Up @@ -165,7 +165,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
_internalInvariant(index == count)
return true
}
return !_isContinuation(self[index])
return !UTF8.isContinuation(self[index])
}

}
Expand Down
14 changes: 5 additions & 9 deletions stdlib/public/core/StringUTF8Validation.swift
Expand Up @@ -7,19 +7,15 @@ private func _isNotOverlong_F0(_ x: UInt8) -> Bool {
}

private func _isNotOverlong_F4(_ x: UInt8) -> Bool {
return _isContinuation(x) && x <= 0x8F
return UTF8.isContinuation(x) && x <= 0x8F
}

private func _isNotOverlong_E0(_ x: UInt8) -> Bool {
return (0xA0...0xBF).contains(x)
}

private func _isNotOverlong_ED(_ x: UInt8) -> Bool {
return _isContinuation(x) && x <= 0x9F
}

private func _isASCII_cmp(_ x: UInt8) -> Bool {
return x <= 0x7F
return UTF8.isContinuation(x) && x <= 0x9F
}

internal struct UTF8ExtraInfo: Equatable {
Expand Down Expand Up @@ -48,7 +44,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
guard f(cu) else { throw UTF8ValidationError() }
}
@inline(__always) func guaranteeContinuation() throws {
try guaranteeIn(_isContinuation)
try guaranteeIn(UTF8.isContinuation)
}

func _legacyInvalidLengthCalculation(_ _buffer: (_storage: UInt32, ())) -> Int {
Expand Down Expand Up @@ -94,7 +90,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
var endIndex = buf.startIndex
var iter = buf.makeIterator()
_ = iter.next()
while let cu = iter.next(), !_isASCII(cu) && !_isUTF8MultiByteLeading(cu) {
while let cu = iter.next(), UTF8.isContinuation(cu) {
endIndex += 1
}
let illegalRange = Range(buf.startIndex...endIndex)
Expand All @@ -107,7 +103,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
do {
var isASCII = true
while let cu = iter.next() {
if _isASCII(cu) { lastValidIndex &+= 1; continue }
if UTF8.isASCII(cu) { lastValidIndex &+= 1; continue }
isASCII = false
if _slowPath(!_isUTF8MultiByteLeading(cu)) {
throw UTF8ValidationError()
Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringUTF8View.swift
Expand Up @@ -415,7 +415,7 @@ extension String.UTF8View {

let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
startingAt: i.strippingTranscoding)
let utf8Len = _numUTF8CodeUnits(scalar)
let utf8Len = UTF8.width(scalar)

if utf8Len == 1 {
_internalInvariant(i.transcodedOffset == 0)
Expand All @@ -442,7 +442,7 @@ extension String.UTF8View {

let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
endingAt: i)
let utf8Len = _numUTF8CodeUnits(scalar)
let utf8Len = UTF8.width(scalar)
return i.encoded(offsetBy: -scalarLen).transcoded(withOffset: utf8Len &- 1)
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringUnicodeScalarView.swift
Expand Up @@ -418,7 +418,7 @@ extension String.UnicodeScalarView {
internal func _foreignIndex(after i: Index) -> Index {
_internalInvariant(_guts.isForeign)
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i)
let len = _isLeadingSurrogate(cu) ? 2 : 1
let len = UTF16.isLeadSurrogate(cu) ? 2 : 1

return i.encoded(offsetBy: len)
}
Expand All @@ -429,7 +429,7 @@ extension String.UnicodeScalarView {
_internalInvariant(_guts.isForeign)
let priorIdx = i.priorEncoded
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx)
let len = _isTrailingSurrogate(cu) ? 2 : 1
let len = UTF16.isTrailSurrogate(cu) ? 2 : 1

return i.encoded(offsetBy: -len)
}
Expand Down

0 comments on commit 3c1d559

Please sign in to comment.