Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions stdlib/public/core/StringStorage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

internal protocol _AbstractStringStorage: _NSCopying {
var asString: String { get }
var utf16: String.UTF16View { get }
var count: Int { get }
var isASCII: Bool { get }
var start: UnsafePointer<UInt8> { get }
Expand All @@ -26,6 +27,7 @@ internal protocol _AbstractStringStorage: _NSCopying {

internal protocol _AbstractStringStorage {
var asString: String { get }
var utf16: String.UTF16View { get }
var count: Int { get }
var isASCII: Bool { get }
var start: UnsafePointer<UInt8> { get }
Expand Down Expand Up @@ -295,6 +297,10 @@ final internal class __StringStorage
get { String(_StringGuts(self)) }
}

@inline(__always)
final internal var utf16: String.UTF16View {
String.UTF16View(_StringGuts(self))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically this should all inline out, but this skips going through the String initializer

}

private init(_doNotCallMe: ()) {
_internalInvariantFailure("Use the create method")
Expand Down Expand Up @@ -721,6 +727,11 @@ final internal class __SharedStringStorage
return String(_StringGuts(self))
}
}

@inline(__always)
final internal var utf16: String.UTF16View {
String.UTF16View(_StringGuts(self))
}

internal init(
_mortal ptr: UnsafePointer<UInt8>,
Expand Down
46 changes: 28 additions & 18 deletions stdlib/public/core/StringStorageBridge.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,17 @@ extension String {

// ObjC interfaces.
extension _AbstractStringStorage {

@inline(__always)
@_effects(releasenone)
internal func _getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>, _ aRange: _SwiftNSRange
) {
_precondition(aRange.location >= 0 && aRange.length >= 0,
"Range out of bounds")
// Note: `count` is counting UTF-8 code units, while `aRange` is measured in
// UTF-16 offsets. This precondition is a necessary, but not sufficient test
// for validity. (More precise checks are done in UTF16View._nativeCopy.)
_precondition(aRange.location + aRange.length <= Int(count),
"Range out of bounds")

let range = unsafe Range(
_uncheckedBounds: (aRange.location, aRange.location+aRange.length))
let str = asString
unsafe str._copyUTF16CodeUnits(
unsafe utf16._nativeCopy(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of the theme of "move all the smarts into StringUTF16View.swift instead of having them scattered in 3-4 different files"

into: UnsafeMutableBufferPointer(start: buffer, count: range.count),
range: range)
offsetRange: range)
}

@inline(__always)
Expand Down Expand Up @@ -116,6 +108,26 @@ extension _AbstractStringStorage {
return _cocoaLengthOfBytesInEncodingTrampoline(self, encoding)
}
}

// The caller info isn't useful here anyway because it's never client code,
// so this makes sure that _character(at:) doesn't have inlined assertion bits
@inline(never)
internal func _characterAtIndexOutOfBounds() -> Never {
_preconditionFailure("String index is out of bounds")
}

@inline(__always)
@_effects(readonly)
internal func _character(at offset: Int) -> UInt16 {
if _fastPath(isASCII) {
if (_fastPath(offset < count && offset >= 0)) {
return unsafe UInt16((start + offset).pointee)
}
_characterAtIndexOutOfBounds()
} else {
return utf16[nativeNonASCIIOffset: offset]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of the theme of "move all the smarts into StringUTF16View.swift instead of having them scattered in 3-4 different files"

}
}

@_effects(readonly)
internal func _nativeIsEqual<T:_AbstractStringStorage>(
Expand Down Expand Up @@ -176,7 +188,7 @@ extension _AbstractStringStorage {
start: utf16Ptr,
count: otherUTF16Length
)
return unsafe asString.utf16.elementsEqual(utf16Buffer) ? 1 : 0
return unsafe utf16.elementsEqual(utf16Buffer) ? 1 : 0
}

/*
Expand All @@ -197,7 +209,7 @@ extension __StringStorage {
if isASCII {
return count
}
return asString.utf16.count
return utf16.count
}
}

Expand All @@ -214,8 +226,7 @@ extension __StringStorage {
@objc(characterAtIndex:)
@_effects(readonly)
final internal func character(at offset: Int) -> UInt16 {
let str = asString
return str.utf16[str._toUTF16Index(offset)]
_character(at: offset)
}

@objc(getCharacters:range:)
Expand Down Expand Up @@ -313,7 +324,7 @@ extension __SharedStringStorage {
if isASCII {
return count
}
return asString.utf16.count
return utf16.count
}
}

Expand All @@ -330,8 +341,7 @@ extension __SharedStringStorage {
@objc(characterAtIndex:)
@_effects(readonly)
final internal func character(at offset: Int) -> UInt16 {
let str = asString
return str.utf16[str._toUTF16Index(offset)]
_character(at: offset)
}

@objc(getCharacters:range:)
Expand Down
45 changes: 38 additions & 7 deletions stdlib/public/core/StringUTF16View.swift
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,22 @@ extension String.UTF16View: BidirectionalCollection {

return _foreignSubscript(position: idx)
}

internal subscript(nativeNonASCIIOffset offset: Int) -> UTF16.CodeUnit {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty much just pulling the relevant bits out of the code it used to go through

@_effects(releasenone) get {
let threshold = _breadcrumbStride / 2
// Do not use breadcrumbs if directly computing the result is expected
// to be cheaper
let idx = offset < threshold ?
_index(startIndex, offsetBy: offset)._knownUTF8 :
_nativeGetIndex(for: offset)
_precondition(idx._encodedOffset < _guts.count,
"String index is out of bounds")
let scalar = _guts.fastUTF8Scalar(
startingAt: _guts.scalarAlign(idx)._encodedOffset)
return scalar.utf16[idx.transcodedOffset]
}
}
}

extension String.UTF16View {
Expand Down Expand Up @@ -948,6 +964,21 @@ extension String.UTF16View {
fatalError()
}
}

// See _nativeCopy(into:alignedRange:), except this uses un-verified UTF16
// offsets instead of aligned indexes
internal func _nativeCopy(
into buffer: UnsafeMutableBufferPointer<UInt16>,
offsetRange range: Range<Int>
) {
let alignedRange = _indexRange(for: range, from: startIndex)
_precondition(alignedRange.lowerBound._encodedOffset <= _guts.count &&
alignedRange.upperBound._encodedOffset <= _guts.count,
"String index is out of bounds")
unsafe _nativeCopy(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should do a fast path in this for blocks of ascii (already have one for the entire thing being known-ascii), but I was having trouble getting it to generate reasonable looking code

into: buffer,
alignedRange: alignedRange.lowerBound ..< alignedRange.upperBound)
}

// Copy (i.e. transcode to UTF-16) our contents into a buffer. `alignedRange`
// means that the indices are part of the UTF16View.indices -- they are either
Expand All @@ -962,16 +993,16 @@ extension String.UTF16View {
range.lowerBound == _utf16AlignNativeIndex(range.lowerBound))
_internalInvariant(
range.upperBound == _utf16AlignNativeIndex(range.upperBound))

if _slowPath(range.isEmpty) { return }

let isASCII = _guts.isASCII
return unsafe _guts.withFastUTF8 { utf8 in
var writeIdx = 0
let writeEnd = buffer.count
var readIdx = range.lowerBound._encodedOffset
let readEnd = range.upperBound._encodedOffset

if isASCII {
_internalInvariant(range.lowerBound.transcodedOffset == 0)
_internalInvariant(range.upperBound.transcodedOffset == 0)
Expand All @@ -984,7 +1015,7 @@ extension String.UTF16View {
}
return
}

// Handle mid-transcoded-scalar initial index
if _slowPath(range.lowerBound.transcodedOffset != 0) {
_internalInvariant(range.lowerBound.transcodedOffset == 1)
Expand All @@ -995,7 +1026,7 @@ extension String.UTF16View {
readIdx &+= len
writeIdx &+= 1
}

// Transcode middle
while readIdx < readEnd {
let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: readIdx)
Expand All @@ -1009,13 +1040,13 @@ extension String.UTF16View {
writeIdx &+= 1
}
}

// Handle mid-transcoded-scalar final index
if _slowPath(range.upperBound.transcodedOffset == 1) {
_internalInvariant(writeIdx < writeEnd)
let (scalar, _) = unsafe _decodeScalar(utf8, startingAt: readIdx)
_internalInvariant(scalar.utf16.count == 2)

// Note: this is intentionally not using the _unchecked subscript.
// (We rely on debug assertions to catch out of bounds access.)
unsafe buffer[writeIdx] = scalar.utf16[0]
Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/UnicodeHelpers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ internal func _decodeUTF8(
return Unicode.Scalar(_unchecked: value)
}

@inlinable
@inlinable @inline(__always)
internal func _decodeScalar(
_ utf8: UnsafeBufferPointer<UInt8>, startingAt i: Int
) -> (Unicode.Scalar, scalarLength: Int) {
Expand Down Expand Up @@ -207,7 +207,7 @@ extension _StringGuts {
}
}

@inlinable
@inlinable @inline(__always)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was actually generating a retain-release around this before?? ~18% overhead in my simple test

internal func fastUTF8Scalar(startingAt i: Int) -> Unicode.Scalar {
_internalInvariant(isFastUTF8)
return unsafe self.withFastUTF8 { unsafe _decodeScalar($0, startingAt: i).0 }
Expand Down
12 changes: 6 additions & 6 deletions stdlib/public/core/UnsafeBufferPointer.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ extension Unsafe${Mutable}BufferPointer: @unsafe Sequence {
return (unsafe Iterator(_position: s + n, _end: s + count), n)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the remaining changes are kinda unrelated, but I was getting frustrated with trivial pointer operations generating calls

@inlinable
@inlinable @_transparent
@safe
public func withContiguousStorageIfAvailable<R>(
_ body: (UnsafeBufferPointer<Element>) throws -> R
Expand Down Expand Up @@ -780,7 +780,7 @@ extension Unsafe${Mutable}BufferPointer:
return try unsafe body(&self)
}

@inlinable
@inlinable @_transparent
@safe
public mutating func withContiguousMutableStorageIfAvailable<R>(
_ body: (inout UnsafeMutableBufferPointer<Element>) throws -> R
Expand Down Expand Up @@ -825,7 +825,7 @@ extension Unsafe${Mutable}BufferPointer {
/// - `rebased.count == slice.count`
///
/// - Parameter slice: The buffer slice to rebase.
@inlinable // unsafe-performance
@inlinable @_transparent // unsafe-performance
public init(rebasing slice: Slice<UnsafeBufferPointer<Element>>) {
// NOTE: `Slice` does not guarantee that its start/end indices are valid
// in `base` -- it merely ensures that `startIndex <= endIndex`.
Expand Down Expand Up @@ -864,7 +864,7 @@ extension Unsafe${Mutable}BufferPointer {
/// - `rebased.count == slice.count`
///
/// - Parameter slice: The buffer slice to rebase.
@inlinable // unsafe-performance
@inlinable @_transparent // unsafe-performance
public init(rebasing slice: Slice<UnsafeMutableBufferPointer<Element>>) {
let base = unsafe slice.base.baseAddress?.advanced(by: slice.startIndex)
let count = unsafe slice.endIndex &- slice.startIndex
Expand Down Expand Up @@ -993,7 +993,7 @@ extension UnsafeMutableBufferPointer {
/// initialize the buffer's storage.
/// - Returns: The index one past the last element of the buffer initialized
/// by this function.
@_alwaysEmitIntoClient
@_alwaysEmitIntoClient @_transparent
public func initialize(
fromContentsOf source: some Collection<Element>
) -> Index {
Expand Down Expand Up @@ -1418,7 +1418,7 @@ extension Unsafe${Mutable}BufferPointer where Element: ~Copyable {
/// method.
/// - buffer: The buffer temporarily bound to `T`.
/// - Returns: The return value, if any, of the `body` closure parameter.
@_alwaysEmitIntoClient
@_alwaysEmitIntoClient @_transparent
public func withMemoryRebound<T: ~Copyable, E: Error, Result: ~Copyable>(
to type: T.Type,
_ body: (_ buffer: ${Self}<T>) throws(E) -> Result
Expand Down
Loading