diff --git a/stdlib/public/SwiftShims/swift/shims/CoreFoundationShims.h b/stdlib/public/SwiftShims/swift/shims/CoreFoundationShims.h index ccf4146483ab9..76539785be192 100644 --- a/stdlib/public/SwiftShims/swift/shims/CoreFoundationShims.h +++ b/stdlib/public/SwiftShims/swift/shims/CoreFoundationShims.h @@ -71,6 +71,10 @@ _swift_stdlib_NSStringGetCStringTrampoline(id _Nonnull obj, _swift_shims_CFIndex maxLength, unsigned long encoding); +SWIFT_RUNTIME_STDLIB_API +const __swift_uint8_t * _Nullable +_swift_stdlib_NSStringUTF8StringTrampoline(id _Nonnull obj); + SWIFT_RUNTIME_STDLIB_API __swift_uint8_t _swift_stdlib_dyld_is_objc_constant_string(const void * _Nonnull addr); diff --git a/stdlib/public/core/String.swift b/stdlib/public/core/String.swift index b4132341e8985..06f8659b9177c 100644 --- a/stdlib/public/core/String.swift +++ b/stdlib/public/core/String.swift @@ -723,7 +723,8 @@ extension String { encodedAs targetEncoding: TargetEncoding.Type, _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - if targetEncoding == UTF8.self { + if targetEncoding == UTF8.self || + (targetEncoding == Unicode.ASCII.self && _guts.isASCII) { return try unsafe self.withCString { (cPtr: UnsafePointer) -> Result in _internalInvariant(UInt8.self == TargetEncoding.CodeUnit.self) @@ -801,7 +802,9 @@ extension String: _ExpressibleByBuiltinStringLiteral { self = String(_StringGuts(smol)) return } - unsafe self.init(_StringGuts(bufPtr, isASCII: Bool(isASCII))) + unsafe self.init( + _StringGuts(nullTerminatedImmortal: bufPtr, isASCII: Bool(isASCII)) + ) } } diff --git a/stdlib/public/core/StringBridge.swift b/stdlib/public/core/StringBridge.swift index 11d9e360f8b82..442695be37aa4 100644 --- a/stdlib/public/core/StringBridge.swift +++ b/stdlib/public/core/StringBridge.swift @@ -280,6 +280,12 @@ internal func _cocoaCStringUsingEncodingTrampoline( return unsafe _swift_stdlib_NSStringCStringUsingEncodingTrampoline(string, encoding) } +@_effects(readonly) +internal func _cocoaUTF8StringTrampoline(_ string: _CocoaString) + -> UnsafePointer? { + return unsafe _swift_stdlib_NSStringUTF8StringTrampoline(string) +} + @_effects(releasenone) internal func _cocoaGetCStringTrampoline( _ string: _CocoaString, @@ -626,10 +632,14 @@ extension String { // TODO: We'd rather emit a valid ObjC object statically than create a // shared string class instance. let gutsCountAndFlags = _guts._object._countAndFlags + let newCountAndFlags = _StringObject.CountAndFlags( + sharedCount: _guts.count, + isASCII: gutsCountAndFlags.isASCII, + isNullTerminated: gutsCountAndFlags.isNullTerminated + ) return unsafe __SharedStringStorage( immortal: _guts._object.fastUTF8.baseAddress!, - countAndFlags: _StringObject.CountAndFlags( - sharedCount: _guts.count, isASCII: gutsCountAndFlags.isASCII)) + countAndFlags: newCountAndFlags) } _internalInvariant(_guts._object.hasObjCBridgeableObject, @@ -666,6 +676,21 @@ internal func _SwiftCreateBridgedString_DoNotCall( @_spi(Foundation) public func _SwiftCreateImmortalString_ForFoundation( buffer: UnsafeBufferPointer, isASCII: Bool +) -> String? { + switch unsafe validateUTF8(buffer) { + case .success(let extraInfo): + return unsafe String( + _StringGuts(nullTerminatedImmortal: buffer, isASCII: extraInfo.isASCII) + ) + default: + return nil + } +} + +@available(SwiftStdlib 6.2, *) +@_spi(Foundation) public func _SwiftCreateNonTerminatedImmortalString_ForFoundation( + buffer: UnsafeBufferPointer, + isASCII: Bool ) -> String? { switch unsafe validateUTF8(buffer) { case .success(let extraInfo): diff --git a/stdlib/public/core/StringCreate.swift b/stdlib/public/core/StringCreate.swift index dafb24b5f85d2..76e3e28473ef7 100644 --- a/stdlib/public/core/StringCreate.swift +++ b/stdlib/public/core/StringCreate.swift @@ -390,7 +390,8 @@ extension String { isASCII: isASCII, isNFC: isASCII, isNativelyStored: false, - isTailAllocated: false + isTailAllocated: false, + isNullTerminated: false ) ) } diff --git a/stdlib/public/core/StringGuts.swift b/stdlib/public/core/StringGuts.swift index 5d41f3ababcc6..a23a5c7690fff 100644 --- a/stdlib/public/core/StringGuts.swift +++ b/stdlib/public/core/StringGuts.swift @@ -49,6 +49,16 @@ extension _StringGuts { internal init(_ smol: _SmallString) { self.init(_StringObject(smol)) } + + @_alwaysEmitIntoClient @inline(__always) + internal init( + nullTerminatedImmortal bufPtr: UnsafeBufferPointer, + isASCII: Bool + ) { + unsafe self.init( + _StringObject(nullTerminatedImmortal: bufPtr, isASCII: isASCII) + ) + } @inlinable @inline(__always) internal init(_ bufPtr: UnsafeBufferPointer, isASCII: Bool) { @@ -235,7 +245,12 @@ extension _StringGuts { internal func _slowWithCString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - _internalInvariant(!_object.isFastZeroTerminated) + if _object.isFastZeroTerminated { + // This branch looks unreachable, but can be reached via `withCString` + // in binaries that haven't been recompiled since the termination flag + // was added to _StringObject. Retry the fast path if so. + return try unsafe withCString(body) + } return try String(self).utf8CString.withUnsafeBufferPointer { let ptr = unsafe $0.baseAddress._unsafelyUnwrappedUnchecked return try unsafe body(ptr) diff --git a/stdlib/public/core/StringObject.swift b/stdlib/public/core/StringObject.swift index b8b3fceaf04c3..65e61931050a2 100644 --- a/stdlib/public/core/StringObject.swift +++ b/stdlib/public/core/StringObject.swift @@ -719,6 +719,9 @@ extension _StringObject { bit-position of isSmall on the BridgeObject. This allows us to check for native storage without an extra branch guarding against smallness. See `_StringObject.hasNativeStorage` for this usage. + - `isNativelyStored` implies `isNullTerminated` in processes rebuilt since + `isNullTerminated` was introduced + b60: isTailAllocated. contiguous UTF-8 code units starts at address + `nativeBias` - `isNativelyStored` always implies `isTailAllocated`, but not vice versa @@ -736,8 +739,16 @@ extension _StringObject { at any time, but we cannot reuse it for something else -- we need to preserve its current meaning to keep inlined index validation code working. - - b48-58: Reserved for future usage. + + b58: isNullTerminated. Set if the contents of the String are known to have + a terminating 0 byte. If not set, no assumptions should be made about the + String's compatibility with e.g. `strlen`. Exception: `isNativelyStored` + implies this flag, but predates it, so it may not be set by older binaries + on ABI-stable platforms. To avoid being tripped up by this subtlety, + always use the `isFastZeroTerminated` accessor rather than checking the + flag directly. + + b48-57: Reserved for future usage. - Because Swift is ABI stable (on some platforms at least), these bits can only be assigned semantics that don't affect interoperability with code built with previous releases of the Standard Library, from 5.0 onward. @@ -786,8 +797,13 @@ extension _StringObject.CountAndFlags { internal static var isForeignUTF8Mask: UInt64 { return 0x0800_0000_0000_0000 } - - // General purpose bottom initializer + + @_alwaysEmitIntoClient // Swift 6.2 + @inline(__always) + internal static var isNullTerminatedMask: UInt64 { + return 0x0400_0000_0000_0000 + } + @inlinable @inline(__always) internal init( count: Int, @@ -795,6 +811,26 @@ extension _StringObject.CountAndFlags { isNFC: Bool, isNativelyStored: Bool, isTailAllocated: Bool + ) { + self.init( + count: count, + isASCII: isASCII, + isNFC: isNFC, + isNativelyStored: isNativelyStored, + isTailAllocated: isTailAllocated, + isNullTerminated: false + ) + } + + // General purpose bottom initializer + @_alwaysEmitIntoClient @inline(__always) + internal init( + count: Int, + isASCII: Bool, + isNFC: Bool, + isNativelyStored: Bool, + isTailAllocated: Bool, + isNullTerminated: Bool ) { var rawBits = UInt64(truncatingIfNeeded: count) _internalInvariant(rawBits <= _StringObject.CountAndFlags.countMask) @@ -816,6 +852,11 @@ extension _StringObject.CountAndFlags { if isTailAllocated { rawBits |= _StringObject.CountAndFlags.isTailAllocatedMask } + + // Tail allocated strings are always terminated, others may or may not be + if isNullTerminated || isNativelyStored { + rawBits |= _StringObject.CountAndFlags.isNullTerminatedMask + } self.init(raw: rawBits) _internalInvariant(count == self.count) @@ -823,6 +864,9 @@ extension _StringObject.CountAndFlags { _internalInvariant(isNFC == self.isNFC) _internalInvariant(isNativelyStored == self.isNativelyStored) _internalInvariant(isTailAllocated == self.isTailAllocated) + _internalInvariant( + isNullTerminated == self.isNullTerminated || + isNativelyStored == self.isNullTerminated) } @inlinable @inline(__always) @@ -846,7 +890,18 @@ extension _StringObject.CountAndFlags { isASCII: isASCII, isNFC: isASCII, isNativelyStored: false, - isTailAllocated: true) + isTailAllocated: true, + isNullTerminated: false) + } + @_alwaysEmitIntoClient @inline(__always) + internal init(nullTerminatedImmortalCount immortalCount: Int, isASCII: Bool) { + self.init( + count: immortalCount, + isASCII: isASCII, + isNFC: isASCII, + isNativelyStored: false, + isTailAllocated: true, + isNullTerminated: true) } @inline(__always) internal init(mortalCount: Int, isASCII: Bool) { @@ -855,7 +910,18 @@ extension _StringObject.CountAndFlags { isASCII: isASCII, isNFC: isASCII, isNativelyStored: true, - isTailAllocated: true) + isTailAllocated: true, + isNullTerminated: true) + } + @inline(__always) + internal init(sharedCount: Int, isASCII: Bool, isNullTerminated: Bool) { + self.init( + count: sharedCount, + isASCII: isASCII, + isNFC: isASCII, + isNativelyStored: false, + isTailAllocated: false, + isNullTerminated: isNullTerminated) } @inline(__always) internal init(sharedCount: Int, isASCII: Bool) { @@ -864,7 +930,8 @@ extension _StringObject.CountAndFlags { isASCII: isASCII, isNFC: isASCII, isNativelyStored: false, - isTailAllocated: false) + isTailAllocated: false, + isNullTerminated: false) } // @@ -909,6 +976,12 @@ extension _StringObject.CountAndFlags { internal var isForeignUTF8: Bool { (_storage & Self.isForeignUTF8Mask) != 0 } + + @_alwaysEmitIntoClient + @inline(__always) // Swift 6.2 + internal var isNullTerminated: Bool { + return 0 != _storage & _StringObject.CountAndFlags.isNullTerminatedMask + } #if !INTERNAL_CHECKS_ENABLED @inlinable @inline(__always) internal func _invariantCheck() {} @@ -1219,16 +1292,40 @@ extension _StringObject { // Small strings nul-terminate when spilling for contiguous access if isSmall { return true } - // TODO(String performance): Use performance flag, which could be more - // inclusive. For now, we only know native strings and small strings (when - // accessed) are. We could also know about some shared strings. - - return largeFastIsTailAllocated + return largeFastIsTailAllocated || _countAndFlags.isNullTerminated } } // Object creation extension _StringObject { + @_alwaysEmitIntoClient @inline(__always) + internal init( + nullTerminatedImmortal bufPtr: UnsafeBufferPointer, + isASCII: Bool + ) { + let countAndFlags = CountAndFlags( + nullTerminatedImmortalCount: bufPtr.count, isASCII: isASCII) +#if _pointerBitWidth(_64) + // We bias to align code paths for mortal and immortal strings + let biasedAddress = unsafe UInt( + bitPattern: bufPtr.baseAddress._unsafelyUnwrappedUnchecked + ) &- _StringObject.nativeBias + + self.init( + pointerBits: UInt64(truncatingIfNeeded: biasedAddress), + discriminator: Nibbles.largeImmortal(), + countAndFlags: countAndFlags) +#elseif _pointerBitWidth(_32) || _pointerBitWidth(_16) + self.init( + variant: .immortal(start: bufPtr.baseAddress._unsafelyUnwrappedUnchecked), + discriminator: Nibbles.largeImmortal(), + countAndFlags: countAndFlags) +#else +#error("Unknown platform") +#endif + } + + @inlinable @inline(__always) internal init(immortal bufPtr: UnsafeBufferPointer, isASCII: Bool) { let countAndFlags = CountAndFlags( @@ -1300,7 +1397,11 @@ extension _StringObject { internal init( cocoa: AnyObject, providesFastUTF8: Bool, isASCII: Bool, length: Int ) { - let countAndFlags = CountAndFlags(sharedCount: length, isASCII: isASCII) + let countAndFlags = CountAndFlags( + sharedCount: length, + isASCII: isASCII, + isNullTerminated: providesFastUTF8 + ) let discriminator = Nibbles.largeCocoa(providesFastUTF8: providesFastUTF8) #if $Embedded fatalError("unreachable in embedded Swift") @@ -1327,7 +1428,11 @@ extension _StringObject { isASCII: Bool, length: Int ) { - let countAndFlags = CountAndFlags(sharedCount: length, isASCII: isASCII) + let countAndFlags = CountAndFlags( + sharedCount: length, + isASCII: isASCII, + isNullTerminated: providesFastUTF8 + ) let discriminator = Nibbles.largeFastImmortalCocoa() #if $Embedded fatalError("unreachable in embedded Swift") diff --git a/stdlib/public/core/StringStorageBridge.swift b/stdlib/public/core/StringStorageBridge.swift index 8d9823f0ce8d3..599eccc57e738 100644 --- a/stdlib/public/core/StringStorageBridge.swift +++ b/stdlib/public/core/StringStorageBridge.swift @@ -82,9 +82,9 @@ extension _AbstractStringStorage { @inline(__always) @_effects(readonly) internal func _cString(encoding: UInt) -> UnsafePointer? { - switch (encoding, isASCII) { - case (_cocoaASCIIEncoding, true), - (_cocoaUTF8Encoding, _): + switch (encoding, isASCII, asString._guts._object.isFastZeroTerminated) { + case (_cocoaASCIIEncoding, true, true), + (_cocoaUTF8Encoding, _, true): return unsafe start default: return unsafe _cocoaCStringUsingEncodingTrampoline(self, encoding) @@ -303,8 +303,10 @@ extension __SharedStringStorage { final internal func _fastCStringContents( _ requiresNulTermination: Int8 ) -> UnsafePointer? { - if isASCII { - return unsafe start._asCChar + if 0 == requiresNulTermination || asString._guts._object.isFastZeroTerminated { + if isASCII { + return unsafe start._asCChar + } } return nil } @@ -312,7 +314,10 @@ extension __SharedStringStorage { @objc(UTF8String) @_effects(readonly) final internal func _utf8String() -> UnsafePointer? { - return start + if asString._guts._object.isFastZeroTerminated { + return start + } + return unsafe _cocoaUTF8StringTrampoline(self) } @objc(cStringUsingEncoding:) diff --git a/stdlib/public/stubs/FoundationHelpers.mm b/stdlib/public/stubs/FoundationHelpers.mm index 66382c677e004..159b19fe284e3 100644 --- a/stdlib/public/stubs/FoundationHelpers.mm +++ b/stdlib/public/stubs/FoundationHelpers.mm @@ -89,6 +89,14 @@ static inline void initializeBridgingFunctions() { return imp(obj, @selector(cStringUsingEncoding:), encoding); } +const __swift_uint8_t * +_swift_stdlib_NSStringUTF8StringTrampoline(id _Nonnull obj) { + typedef __swift_uint8_t * _Nullable (*utf8StrImplPtr)(id, SEL); + utf8StrImplPtr imp = (utf8StrImplPtr)class_getMethodImplementation([obj superclass], + @selector(UTF8String)); + return imp(obj, @selector(UTF8String)); +} + __swift_uint8_t _swift_stdlib_NSStringGetCStringTrampoline(id _Nonnull obj, _swift_shims_UInt8 *buffer, diff --git a/test/abi/macOS/arm64/stdlib-asserts.swift b/test/abi/macOS/arm64/stdlib-asserts.swift index a298714a7c08d..0af7ea01849f3 100644 --- a/test/abi/macOS/arm64/stdlib-asserts.swift +++ b/test/abi/macOS/arm64/stdlib-asserts.swift @@ -45,4 +45,7 @@ Added: _$ss24_RuntimeFunctionCountersV07runtimeB11NameToIndexSDySSSiGvpZMV Added: _$ss24_RuntimeFunctionCountersV07runtimeB5NamesSaySSGvpZMV Added: _$ss24_RuntimeFunctionCountersV07runtimebC7OffsetsSPys6UInt16VGvpZMV +Added: _$ss13_StringObjectV13CountAndFlagsV16isNullTerminatedSbvpMV +Added: _$ss13_StringObjectV13CountAndFlagsV20isNullTerminatedMasks6UInt64VvpZMV + // Runtime Symbols diff --git a/test/abi/macOS/arm64/stdlib.swift b/test/abi/macOS/arm64/stdlib.swift index 9ca81612dee6d..9310a295772f2 100644 --- a/test/abi/macOS/arm64/stdlib.swift +++ b/test/abi/macOS/arm64/stdlib.swift @@ -942,3 +942,11 @@ Added: _$ss18EnumeratedSequenceVsSlRzrlE7isEmptySbvpMV Added: _$ss18EnumeratedSequenceVsSlRzrlE8endIndexABsSlRzrlE0D0Vyx_GvpMV Added: _$ss18EnumeratedSequenceVsSlRzrlEySi6offset_7ElementQz7elementtABsSlRzrlE5IndexVyx_GcipMV Added: _$ss18EnumeratedSequenceVyxGSKsSkRzrlMc + +// CoroutineAccessors +Added: _swift_coro_alloc +Added: _swift_coro_dealloc + +// Shared string creation for Foundation, again +Added: _$ss53_SwiftCreateNonTerminatedImmortalString_ForFoundation6buffer7isASCIISSSgSRys5UInt8VG_SbtF +Added: __swift_stdlib_NSStringUTF8StringTrampoline diff --git a/test/abi/macOS/x86_64/stdlib-asserts.swift b/test/abi/macOS/x86_64/stdlib-asserts.swift index 235e155217768..b80a6747e8eb1 100644 --- a/test/abi/macOS/x86_64/stdlib-asserts.swift +++ b/test/abi/macOS/x86_64/stdlib-asserts.swift @@ -64,5 +64,8 @@ Added: _$ss24_RuntimeFunctionCountersV07runtimeB11NameToIndexSDySSSiGvpZMV Added: _$ss24_RuntimeFunctionCountersV07runtimeB5NamesSaySSGvpZMV Added: _$ss24_RuntimeFunctionCountersV07runtimebC7OffsetsSPys6UInt16VGvpZMV +Added: _$ss13_StringObjectV13CountAndFlagsV16isNullTerminatedSbvpMV +Added: _$ss13_StringObjectV13CountAndFlagsV20isNullTerminatedMasks6UInt64VvpZMV + // Runtime Symbols Added: _swift_clearSensitive diff --git a/test/abi/macOS/x86_64/stdlib.swift b/test/abi/macOS/x86_64/stdlib.swift index 51506f74c0200..b04680f7a5e90 100644 --- a/test/abi/macOS/x86_64/stdlib.swift +++ b/test/abi/macOS/x86_64/stdlib.swift @@ -942,3 +942,11 @@ Added: _$ss18EnumeratedSequenceVsSlRzrlE7isEmptySbvpMV Added: _$ss18EnumeratedSequenceVsSlRzrlE8endIndexABsSlRzrlE0D0Vyx_GvpMV Added: _$ss18EnumeratedSequenceVsSlRzrlEySi6offset_7ElementQz7elementtABsSlRzrlE5IndexVyx_GcipMV Added: _$ss18EnumeratedSequenceVyxGSKsSkRzrlMc + +// CoroutineAccessors +Added: _swift_coro_alloc +Added: _swift_coro_dealloc + +// Shared string creation for Foundation, again +Added: _$ss53_SwiftCreateNonTerminatedImmortalString_ForFoundation6buffer7isASCIISSSgSRys5UInt8VG_SbtF +Added: __swift_stdlib_NSStringUTF8StringTrampoline diff --git a/test/stdlib/StringBridge.swift b/test/stdlib/StringBridge.swift index b4f32c8bb416e..37db76da6554c 100644 --- a/test/stdlib/StringBridge.swift +++ b/test/stdlib/StringBridge.swift @@ -56,8 +56,10 @@ StringBridgeTests.test("Tagged NSString") { #endif // not 32bit } -StringBridgeTests.test("Constant NSString New SPI") { - if #available(SwiftStdlib 6.1, *) { +StringBridgeTests.test("Constant NSString New SPI") + .require(.stdlib_6_1) + .code { + guard #available(SwiftStdlib 6.1, *) else { return } //21 characters long so avoids _SmallString let constantString:NSString = CFRunLoopMode.commonModes.rawValue as NSString let regularBridged = constantString as String @@ -74,7 +76,45 @@ StringBridgeTests.test("Constant NSString New SPI") { ObjectIdentifier(reverseBridged) ) expectEqual(bridged, regularBridged) - } +} + +StringBridgeTests.test("Shared String SPI") + .require(.stdlib_6_2) + .code { + guard #available(SwiftStdlib 6.2, *) else { return } + func test(literal: String, isASCII: Bool) { + let baseCount = literal.utf8.count + literal.withCString { intptr in + intptr.withMemoryRebound(to: UInt8.self, capacity: baseCount) { ptr in + let fullBuffer = UnsafeBufferPointer(start: ptr, count: baseCount) + let fullString = _SwiftCreateImmortalString_ForFoundation( + buffer: fullBuffer, + isASCII: isASCII + ) + expectNotNil(fullString) + let bridgedFullString = (fullString! as NSString) + let fullCString = bridgedFullString.utf8String! + expectEqual(baseCount, strlen(fullCString)) + expectEqual(strcmp(ptr, fullCString), 0) + let fullCString2 = bridgedFullString.utf8String! + expectEqual(fullCString, fullCString2) //if we're already terminated, we can return the contents pointer as-is + + let partialBuffer = UnsafeBufferPointer(start: ptr, count: 16) //make sure it's not a smol string + let partialString = _SwiftCreateNonTerminatedImmortalString_ForFoundation( + buffer: partialBuffer, + isASCII: isASCII + ) + expectNotNil(partialString) + let partialCString = (partialString! as NSString).utf8String! + expectEqual(16, strlen(partialCString)) + expectEqual(strncmp(ptr, fullCString, 16), 0) + withExtendedLifetime(partialString) {} + withExtendedLifetime(fullString) {} + } + } + } + test(literal: "abcdefghijklmnopqrstuvwxyz", isASCII: true) + test(literal: "abcdëfghijklmnopqrstuvwxyz", isASCII: false) } StringBridgeTests.test("Bridging") {