diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt index ba69e14096a69..287b08cee96d8 100644 --- a/stdlib/public/core/CMakeLists.txt +++ b/stdlib/public/core/CMakeLists.txt @@ -132,6 +132,7 @@ set(SWIFTLIB_ESSENTIAL SetStorage.swift SetVariant.swift ShadowProtocols.swift + SharedString.swift Shims.swift Slice.swift SmallBuffer.swift diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json index 380cdfe83e135..101a870b3301a 100644 --- a/stdlib/public/core/GroupInfo.json +++ b/stdlib/public/core/GroupInfo.json @@ -10,6 +10,7 @@ "CharacterProperties.swift", "ICU.swift", "NormalizedCodeUnitIterator.swift", + "SharedString.swift", "SmallString.swift", "StaticString.swift", "String.swift", diff --git a/stdlib/public/core/SharedString.swift b/stdlib/public/core/SharedString.swift new file mode 100644 index 0000000000000..93a06d196b06c --- /dev/null +++ b/stdlib/public/core/SharedString.swift @@ -0,0 +1,104 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +extension String { + /// Creates a `String` whose backing store is shared with the UTF-8 data + /// referenced by the given buffer pointer. + /// + /// The `owner` argument should manage the lifetime of the shared buffer and + /// its deinitializer is responsible for deallocating `buffer`. The `String` + /// instance created by this initializer retains `owner` so that deallocation + /// occurs after the string is no longer in use. The buffer _must not_ be + /// deallocated while there are any strings sharing it. + /// + /// This initializer does not try to repair ill-formed UTF-8 code unit + /// sequences. If any are found, the result of the initializer is `nil`. + /// + /// - Parameters: + /// - buffer: An `UnsafeBufferPointer` containing the UTF-8 bytes that + /// should be shared with the created `String`. + /// - owner: An optional object that owns the memory referenced by `buffer` + /// and is responsible for deallocating it. + public init?( + sharingContent buffer: UnsafeBufferPointer, + owner: AnyObject? + ) { + guard let baseAddress = buffer.baseAddress, + case .success(let extraInfo) = validateUTF8(buffer) + else { + return nil + } + let storage = __SharedStringStorage( + immortal: baseAddress, + countAndFlags: _StringObject.CountAndFlags( + sharedCount: buffer.count, + isASCII: extraInfo.isASCII)) + storage._owner = owner + self.init(String(_StringGuts(storage))) + } + + /// Creates a `String` whose backing store is shared with the UTF-8 data in + /// the given array. + /// + /// The new `String` instance shares ownership of the array's underlying + /// storage, guaranteeing that the `String` remains valid even if the `array` + /// is released. + /// + /// This initializer does not try to repair ill-formed UTF-8 code unit + /// sequences. If any are found, the result of the initializer is `nil`. + /// + /// - Parameter array: An `Array` containing the UTF-8 bytes that should be + /// shared with the created `String`. + public init?(sharing array: [UInt8]) { + guard case (let owner, let rawPtr?) = array._cPointerArgs() else { + self = "" + return + } + let baseAddress = rawPtr.assumingMemoryBound(to: UInt8.self) + let buffer = UnsafeBufferPointer(start: baseAddress, count: array.count) + self.init(sharingContent: buffer, owner: owner) + } +} + +extension Substring { + /// Calls the given closure, passing it a `String` whose contents are equal to + /// the substring but which shares the substring's storage instead of copying + /// it. + /// + /// The `String` value passed into the closure is only valid durings its + /// execution. + /// + /// - Parameter body: A closure with a `String` parameter that shares the + /// storage of the substring. If `body` has a return value, that value is + /// also used as the return value for the `withSharedString(_:)` method. The + /// argument is valid only for the duration of the closure's execution. + /// - Returns: The return value, if any, of the `body` closure parameter. + public func withSharedString( + _ body: (String) throws -> Result + ) rethrows -> Result { + guard _wholeGuts.isFastUTF8 else { + // TODO: What's the right way to get the pointer for a non-fast-UTF-8 + // string here? + return try body(String(self)) + } + + return try _wholeGuts.withFastUTF8 { utf8 in + let storage = __SharedStringStorage( + immortal: utf8.baseAddress! + self._offsetRange.lowerBound, + countAndFlags: _StringObject.CountAndFlags( + sharedCount: self._offsetRange.count, + isASCII: _wholeGuts.isASCII)) + let str = String(_StringGuts(storage)) + return try body(str) + } + } +} diff --git a/test/stdlib/SharedString.swift b/test/stdlib/SharedString.swift new file mode 100644 index 0000000000000..560effa4feeb9 --- /dev/null +++ b/test/stdlib/SharedString.swift @@ -0,0 +1,90 @@ +// RUN: %target-run-simple-swift +// REQUIRES: executable_test + +// +// Tests for shared string APIs +// + +import StdlibUnittest + +var SharedStringTests = TestSuite("SharedStringTests") + +func makeValidUTF8Buffer() -> UnsafeBufferPointer { + let ptr = UnsafeMutablePointer.allocate(capacity: 4) + ptr.initialize(repeating: UInt8(ascii: "a"), count: 4) + return UnsafeBufferPointer(start: ptr, count: 4) +} + +func makeInvalidUTF8Buffer() -> UnsafeBufferPointer { + let ptr = UnsafeMutablePointer.allocate(capacity: 1) + ptr.pointee = 0x80 // orphaned continuation byte + return UnsafeBufferPointer(start: ptr, count: 1) +} + +class BufferDeallocator { + let buffer: UnsafeBufferPointer + + init(_ buffer: UnsafeBufferPointer) { + self.buffer = buffer + } + + deinit { + buffer.deallocate() + } +} + +SharedStringTests.test("String.init(sharingContent:owner:)") { + let buf = makeValidUTF8Buffer() + let str = String(sharingContent: buf, owner: BufferDeallocator(buf)) + + expectNotNil(str) + expectEqual("aaaa", str!) + + // Show that the string didn't copy the buffer by modifying it in-place. + UnsafeMutableBufferPointer(mutating: buf)[0] = UInt8(ascii: "b") + expectEqual("baaa", str!) + + // Show that mutating a copy works as expected. + var copy = str! + copy.append("c") + expectEqual("baaac", copy) + expectEqual("baaa", str!) +} + +SharedStringTests.test("String.init(sharingContent:owner:) invalid UTF8") { + let buf = makeInvalidUTF8Buffer() + let str = String(sharingContent: buf, owner: BufferDeallocator(buf)) + + expectNil(str) +} + +SharedStringTests.test("Substring.withSharedString(_:)") { + let original = "abcde" + let substr = original.dropFirst().dropLast() + + substr.withSharedString { shared in + expectEqual("bcd", shared) + } +} + +SharedStringTests.test("String.init(sharing:)") { + var array = [UInt8](repeating: UInt8(ascii: "a"), count: 4) + let str = String(sharing: array) + + expectNotNil(str) + expectEqual("aaaa", str!) + + // Show that mutating the array causes CoW and the original string isn't + // modified. + array[0] = UInt8(ascii: "b") + expectEqual("aaaa", str!) +} + +SharedStringTests.test("String.init(sharing:) invalid UTF8") { + let array: [UInt8] = [0x80] // orphaned continuation byte + let str = String(sharing: array) + + expectNil(str) +} + +runAllTests()