From 17228cdb019c2072efa68eb71c916ea2a050c894 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 1 Mar 2019 11:45:12 -0800 Subject: [PATCH] [SE-0247] Add contiguous string APIs Adds API for querying, enforcing, and using contiguous strings. --- stdlib/public/core/AssertCommon.swift | 6 +- stdlib/public/core/ContiguouslyStored.swift | 30 +------ stdlib/public/core/LegacyABI.swift | 21 +++++ stdlib/public/core/OutputStream.swift | 3 +- stdlib/public/core/String.swift | 3 +- stdlib/public/core/StringCreate.swift | 11 --- stdlib/public/core/StringProtocol.swift | 94 +++++++++++++++++++++ 7 files changed, 127 insertions(+), 41 deletions(-) diff --git a/stdlib/public/core/AssertCommon.swift b/stdlib/public/core/AssertCommon.swift index f4d401ab083d3..b225cfe9c6fd1 100644 --- a/stdlib/public/core/AssertCommon.swift +++ b/stdlib/public/core/AssertCommon.swift @@ -115,7 +115,8 @@ internal func _assertionFailure( ) -> Never { prefix.withUTF8Buffer { (prefix) -> Void in - message._withUnsafeBufferPointerToUTF8 { + var message = message + message.withUTF8 { (messageUTF8) -> Void in file.withUTF8Buffer { (file) -> Void in @@ -145,7 +146,8 @@ internal func _assertionFailure( ) -> Never { prefix.withUTF8Buffer { (prefix) -> Void in - message._withUnsafeBufferPointerToUTF8 { + var message = message + message.withUTF8 { (messageUTF8) -> Void in _swift_stdlib_reportFatalError( prefix.baseAddress!, CInt(prefix.count), diff --git a/stdlib/public/core/ContiguouslyStored.swift b/stdlib/public/core/ContiguouslyStored.swift index 2a6d3ad4192be..55b625ec8701a 100644 --- a/stdlib/public/core/ContiguouslyStored.swift +++ b/stdlib/public/core/ContiguouslyStored.swift @@ -79,23 +79,12 @@ extension String: _HasContiguousBytes { @inline(__always) get { return self._guts.isFastUTF8 } } - @inlinable @inline(__always) - internal func _withUTF8( - _ body: (UnsafeBufferPointer) throws -> R - ) rethrows -> R { - if _fastPath(self._guts.isFastUTF8) { - return try self._guts.withFastUTF8 { - try body($0) - } - } - return try String._copying(self)._guts.withFastUTF8 { try body($0) } - } - @inlinable @inline(__always) internal func withUnsafeBytes( _ body: (UnsafeRawBufferPointer) throws -> R ) rethrows -> R { - return try self._withUTF8 { return try body(UnsafeRawBufferPointer($0)) } + var copy = self + return try copy.withUTF8 { return try body(UnsafeRawBufferPointer($0)) } } } extension Substring: _HasContiguousBytes { @@ -104,22 +93,11 @@ extension Substring: _HasContiguousBytes { @inline(__always) get { return self._wholeGuts.isFastUTF8 } } - @inlinable @inline(__always) - internal func _withUTF8( - _ body: (UnsafeBufferPointer) throws -> R - ) rethrows -> R { - if _fastPath(_wholeGuts.isFastUTF8) { - return try _wholeGuts.withFastUTF8(range: self._offsetRange) { - return try body($0) - } - } - return try String._copying(self)._guts.withFastUTF8 { try body($0) } - } - @inlinable @inline(__always) internal func withUnsafeBytes( _ body: (UnsafeRawBufferPointer) throws -> R ) rethrows -> R { - return try self._withUTF8 { return try body(UnsafeRawBufferPointer($0)) } + var copy = self + return try copy.withUTF8 { return try body(UnsafeRawBufferPointer($0)) } } } diff --git a/stdlib/public/core/LegacyABI.swift b/stdlib/public/core/LegacyABI.swift index 6f64dfd270db8..604a86bda1a97 100644 --- a/stdlib/public/core/LegacyABI.swift +++ b/stdlib/public/core/LegacyABI.swift @@ -39,3 +39,24 @@ extension Substring { internal var _wholeString: String { return base } } +extension String { + @available(*, unavailable, renamed: "String.withUTF8") + @inlinable + internal func _withUTF8( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R { + var copy = self + return try copy.withUTF8(body) + } +} + +extension Substring { + @available(*, unavailable, renamed: "Substring.withUTF8") + @inlinable + internal func _withUTF8( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R { + var copy = self + return try copy.withUTF8(body) + } +} diff --git a/stdlib/public/core/OutputStream.swift b/stdlib/public/core/OutputStream.swift index 6303846cfdbb5..a4dbca9dc310e 100644 --- a/stdlib/public/core/OutputStream.swift +++ b/stdlib/public/core/OutputStream.swift @@ -521,7 +521,8 @@ internal struct _Stdout : TextOutputStream { internal mutating func write(_ string: String) { if string.isEmpty { return } - _ = string._withUTF8 { utf8 in + var string = string + _ = string.withUTF8 { utf8 in _swift_stdlib_fwrite_stdout(utf8.baseAddress!, 1, utf8.count) } } diff --git a/stdlib/public/core/String.swift b/stdlib/public/core/String.swift index 270a7ad45c833..658ce7b088ecf 100644 --- a/stdlib/public/core/String.swift +++ b/stdlib/public/core/String.swift @@ -468,7 +468,8 @@ extension String { encodedAs targetEncoding: TargetEncoding.Type, _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - return try self._withUTF8 { utf8 in + var copy = self + return try copy.withUTF8 { utf8 in var arg = Array() arg.reserveCapacity(1 &+ self._guts.count / 4) let repaired = transcode( diff --git a/stdlib/public/core/StringCreate.swift b/stdlib/public/core/StringCreate.swift index 648420e30ab7d..62d725766927b 100644 --- a/stdlib/public/core/StringCreate.swift +++ b/stdlib/public/core/StringCreate.swift @@ -144,16 +144,6 @@ extension String { return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) } } - internal func _withUnsafeBufferPointerToUTF8( - _ body: (UnsafeBufferPointer) throws -> R - ) rethrows -> R { - return try self.withUnsafeBytes { rawBufPtr in - return try body(UnsafeBufferPointer( - start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self), - count: rawBufPtr.count)) - } - } - @usableFromInline @inline(never) // slow-path internal static func _fromCodeUnits< Input: Collection, @@ -218,4 +208,3 @@ extension String { } } } - diff --git a/stdlib/public/core/StringProtocol.swift b/stdlib/public/core/StringProtocol.swift index 4e31396c64254..2b2f3fdd15290 100644 --- a/stdlib/public/core/StringProtocol.swift +++ b/stdlib/public/core/StringProtocol.swift @@ -173,4 +173,98 @@ extension StringProtocol { } } +// Contiguous UTF-8 strings +extension String { + /// Returns whether this string is capable of providing access to + /// validly-encoded UTF-8 contents in contiguous memory in O(1) time. + /// + /// Contiguous strings always operate in O(1) time for withUTF8 and always + /// give a result for String.UTF8View.withContiguousStorageIfAvailable. + /// Contiguous strings also benefit from fast-paths and better optimizations. + /// + @_alwaysEmitIntoClient + public var isContiguousUTF8: Bool { return _guts.isFastUTF8 } + + /// If this string is not contiguous, make it so. If this mutates the string, + /// it will invalidate any pre-existing indices. + /// + /// Complexity: O(n) if non-contiguous, O(1) if already contiguous + /// + @_alwaysEmitIntoClient + public mutating func makeContiguousUTF8() { + if _fastPath(isContiguousUTF8) { return } + self = String._copying(self) + } + + /// Runs `body` over the content of this string in contiguous memory. If this + /// string is not contiguous, this will first make it contiguous, which will + /// also speed up subsequent access. If this mutates the string, + /// it will invalidate any pre-existing indices. + /// + /// Note that it is unsafe to escape the pointer provided to `body`. For + /// example, strings of up to 15 UTF-8 code units in length may be represented + /// in a small-string representation, and thus will be spilled into + /// temporary stack space which is invalid after `withUTF8` finishes + /// execution. + /// + /// Complexity: O(n) if non-contiguous, O(1) if already contiguous + /// + @_alwaysEmitIntoClient + public mutating func withUTF8( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R { + makeContiguousUTF8() + return try _guts.withFastUTF8(body) + } +} +// Contiguous UTF-8 strings +extension Substring { + /// Returns whether this string is capable of providing access to + /// validly-encoded UTF-8 contents in contiguous memory in O(1) time. + /// + /// Contiguous strings always operate in O(1) time for withUTF8 and always + /// give a result for String.UTF8View.withContiguousStorageIfAvailable. + /// Contiguous strings also benefit from fast-paths and better optimizations. + /// + @_alwaysEmitIntoClient + public var isContiguousUTF8: Bool { return self.base.isContiguousUTF8 } + + /// If this string is not contiguous, make it so. If this mutates the + /// substring, it will invalidate any pre-existing indices. + /// + /// Complexity: O(n) if non-contiguous, O(1) if already contiguous + /// + @_alwaysEmitIntoClient + public mutating func makeContiguousUTF8() { + if _fastPath(isContiguousUTF8) { return } + self = String._copying(self)[...] + } + + /// Runs `body` over the content of this substring in contiguous memory. If + /// this substring is not contiguous, this will first make it contiguous, + /// which will also speed up subsequent access. If this mutates the substring, + /// it will invalidate any pre-existing indices. + /// + /// Note that it is unsafe to escape the pointer provided to `body`. For + /// example, strings of up to 15 UTF-8 code units in length may be represented + /// in a small-string representation, and thus will be spilled into + /// temporary stack space which is invalid after `withUTF8` finishes + /// execution. + /// + /// Complexity: O(n) if non-contiguous, O(1) if already contiguous + /// + @_alwaysEmitIntoClient + public mutating func withUTF8( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R { + if _fastPath(isContiguousUTF8) { + return try _wholeGuts.withFastUTF8(range: self._offsetRange) { + return try body($0) + } + } + + makeContiguousUTF8() + return try _wholeGuts.withFastUTF8(body) + } +}