Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
76d48b9
WIP: add a flag to track null termination of string literals
Catfish-Man Mar 18, 2025
8715633
Remove unnecessary ABI
Catfish-Man Mar 18, 2025
79e8358
Add new ABI
Catfish-Man Mar 18, 2025
da98c52
Merge branch 'main' into come-with-me-if-you-want-to-live-forever
Catfish-Man Mar 18, 2025
0c8fb4c
Rename as requested
Catfish-Man Mar 18, 2025
b1f4e1f
Merge remote-tracking branch 'refs/remotes/origin/come-with-me-if-you…
Catfish-Man Mar 18, 2025
f8522f4
Sidestep the question of whether isNullTerminated should say yes or n…
Catfish-Man Mar 18, 2025
6c53a52
Make non-terminated shared strings bridging-safe
Catfish-Man Mar 20, 2025
1fc7b0f
Leave old symbol in place
Catfish-Man Mar 20, 2025
bc212c5
Apparently @_aEIC vars still have symbols in some cases
Catfish-Man Mar 20, 2025
a9d5370
Merge remote-tracking branch 'origin/main' into come-with-me-if-you-w…
Catfish-Man Mar 20, 2025
69385a2
Add a test for bridging the new SPI
Catfish-Man Mar 20, 2025
37b1770
Add symbols on x86 too
Catfish-Man Mar 20, 2025
344f41a
Fix availability and fix up test
Catfish-Man Mar 20, 2025
eaced32
Plumb flags through bridging properly, and check them in shared, not …
Catfish-Man Mar 21, 2025
1f8e86a
Set null terminated to true for non-shared strings and for bridged st…
Catfish-Man Mar 21, 2025
093135c
Merge branch 'main' into come-with-me-if-you-want-to-live-forever
Catfish-Man Mar 28, 2025
a34dfc8
Fix withCString in the presence of old binaries, and optimize it for …
Catfish-Man Mar 28, 2025
0eed4cc
Remove now-redundant thing
Catfish-Man Mar 28, 2025
f33b8ad
Fix a warning
Catfish-Man Mar 28, 2025
8025cc0
Merge branch 'main' into come-with-me-if-you-want-to-live-forever
Catfish-Man Mar 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions stdlib/public/SwiftShims/swift/shims/CoreFoundationShims.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ _swift_stdlib_NSStringGetCStringTrampoline(id _Nonnull obj,
_swift_shims_CFIndex maxLength,
unsigned long encoding);

SWIFT_RUNTIME_STDLIB_API
const __swift_uint8_t * _Nullable
_swift_stdlib_NSStringUTF8StringTrampoline(id _Nonnull obj);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My kingdom for a way to force swiftc to generate an objc_msgSend_super call so I don't have to do this ridiculous hoop jumping


SWIFT_RUNTIME_STDLIB_API
__swift_uint8_t
_swift_stdlib_dyld_is_objc_constant_string(const void * _Nonnull addr);
Expand Down
7 changes: 5 additions & 2 deletions stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,8 @@ extension String {
encodedAs targetEncoding: TargetEncoding.Type,
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
) rethrows -> Result {
if targetEncoding == UTF8.self {
if targetEncoding == UTF8.self ||
(targetEncoding == Unicode.ASCII.self && _guts.isASCII) {
return try unsafe self.withCString {
(cPtr: UnsafePointer<CChar>) -> Result in
_internalInvariant(UInt8.self == TargetEncoding.CodeUnit.self)
Expand Down Expand Up @@ -801,7 +802,9 @@ extension String: _ExpressibleByBuiltinStringLiteral {
self = String(_StringGuts(smol))
return
}
unsafe self.init(_StringGuts(bufPtr, isASCII: Bool(isASCII)))
unsafe self.init(
_StringGuts(nullTerminatedImmortal: bufPtr, isASCII: Bool(isASCII))
)
}
}

Expand Down
29 changes: 27 additions & 2 deletions stdlib/public/core/StringBridge.swift
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,12 @@ internal func _cocoaCStringUsingEncodingTrampoline(
return unsafe _swift_stdlib_NSStringCStringUsingEncodingTrampoline(string, encoding)
}

@_effects(readonly)
internal func _cocoaUTF8StringTrampoline(_ string: _CocoaString)
-> UnsafePointer<UInt8>? {
return unsafe _swift_stdlib_NSStringUTF8StringTrampoline(string)
}

@_effects(releasenone)
internal func _cocoaGetCStringTrampoline(
_ string: _CocoaString,
Expand Down Expand Up @@ -626,10 +632,14 @@ extension String {
// TODO: We'd rather emit a valid ObjC object statically than create a
// shared string class instance.
let gutsCountAndFlags = _guts._object._countAndFlags
let newCountAndFlags = _StringObject.CountAndFlags(
sharedCount: _guts.count,
isASCII: gutsCountAndFlags.isASCII,
isNullTerminated: gutsCountAndFlags.isNullTerminated
)
return unsafe __SharedStringStorage(
immortal: _guts._object.fastUTF8.baseAddress!,
countAndFlags: _StringObject.CountAndFlags(
sharedCount: _guts.count, isASCII: gutsCountAndFlags.isASCII))
countAndFlags: newCountAndFlags)
}

_internalInvariant(_guts._object.hasObjCBridgeableObject,
Expand Down Expand Up @@ -666,6 +676,21 @@ internal func _SwiftCreateBridgedString_DoNotCall(
@_spi(Foundation) public func _SwiftCreateImmortalString_ForFoundation(
buffer: UnsafeBufferPointer<UInt8>,
isASCII: Bool
) -> String? {
switch unsafe validateUTF8(buffer) {
case .success(let extraInfo):
return unsafe String(
_StringGuts(nullTerminatedImmortal: buffer, isASCII: extraInfo.isASCII)
)
default:
return nil
}
}

@available(SwiftStdlib 6.2, *)
@_spi(Foundation) public func _SwiftCreateNonTerminatedImmortalString_ForFoundation(
buffer: UnsafeBufferPointer<UInt8>,
isASCII: Bool
) -> String? {
switch unsafe validateUTF8(buffer) {
case .success(let extraInfo):
Expand Down
3 changes: 2 additions & 1 deletion stdlib/public/core/StringCreate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ extension String {
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: false,
isTailAllocated: false
isTailAllocated: false,
isNullTerminated: false
)
)
}
Expand Down
17 changes: 16 additions & 1 deletion stdlib/public/core/StringGuts.swift
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ extension _StringGuts {
internal init(_ smol: _SmallString) {
self.init(_StringObject(smol))
}

@_alwaysEmitIntoClient @inline(__always)
internal init(
nullTerminatedImmortal bufPtr: UnsafeBufferPointer<UInt8>,
isASCII: Bool
) {
unsafe self.init(
_StringObject(nullTerminatedImmortal: bufPtr, isASCII: isASCII)
)
}

@inlinable @inline(__always)
internal init(_ bufPtr: UnsafeBufferPointer<UInt8>, isASCII: Bool) {
Expand Down Expand Up @@ -235,7 +245,12 @@ extension _StringGuts {
internal func _slowWithCString<Result>(
_ body: (UnsafePointer<Int8>) throws -> Result
) rethrows -> Result {
_internalInvariant(!_object.isFastZeroTerminated)
if _object.isFastZeroTerminated {
// This branch looks unreachable, but can be reached via `withCString`
// in binaries that haven't been recompiled since the termination flag
// was added to _StringObject. Retry the fast path if so.
return try unsafe withCString(body)
}
return try String(self).utf8CString.withUnsafeBufferPointer {
let ptr = unsafe $0.baseAddress._unsafelyUnwrappedUnchecked
return try unsafe body(ptr)
Expand Down
133 changes: 119 additions & 14 deletions stdlib/public/core/StringObject.swift
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,9 @@ extension _StringObject {
bit-position of isSmall on the BridgeObject. This allows us to check for
native storage without an extra branch guarding against smallness. See
`_StringObject.hasNativeStorage` for this usage.
- `isNativelyStored` implies `isNullTerminated` in processes rebuilt since
`isNullTerminated` was introduced


b60: isTailAllocated. contiguous UTF-8 code units starts at address + `nativeBias`
- `isNativelyStored` always implies `isTailAllocated`, but not vice versa
Expand All @@ -736,8 +739,16 @@ extension _StringObject {
at any time, but we cannot reuse it for something else -- we need to
preserve its current meaning to keep inlined index validation code
working.

b48-58: Reserved for future usage.

b58: isNullTerminated. Set if the contents of the String are known to have
a terminating 0 byte. If not set, no assumptions should be made about the
String's compatibility with e.g. `strlen`. Exception: `isNativelyStored`
implies this flag, but predates it, so it may not be set by older binaries
on ABI-stable platforms. To avoid being tripped up by this subtlety,
always use the `isFastZeroTerminated` accessor rather than checking the
flag directly.

b48-57: Reserved for future usage.
- Because Swift is ABI stable (on some platforms at least), these bits can
only be assigned semantics that don't affect interoperability with code
built with previous releases of the Standard Library, from 5.0 onward.
Expand Down Expand Up @@ -786,15 +797,40 @@ extension _StringObject.CountAndFlags {
internal static var isForeignUTF8Mask: UInt64 {
return 0x0800_0000_0000_0000
}

// General purpose bottom initializer

@_alwaysEmitIntoClient // Swift 6.2
@inline(__always)
internal static var isNullTerminatedMask: UInt64 {
return 0x0400_0000_0000_0000
}

@inlinable @inline(__always)
internal init(
count: Int,
isASCII: Bool,
isNFC: Bool,
isNativelyStored: Bool,
isTailAllocated: Bool
) {
self.init(
count: count,
isASCII: isASCII,
isNFC: isNFC,
isNativelyStored: isNativelyStored,
isTailAllocated: isTailAllocated,
isNullTerminated: false
)
}

// General purpose bottom initializer
@_alwaysEmitIntoClient @inline(__always)
internal init(
count: Int,
isASCII: Bool,
isNFC: Bool,
isNativelyStored: Bool,
isTailAllocated: Bool,
isNullTerminated: Bool
) {
var rawBits = UInt64(truncatingIfNeeded: count)
_internalInvariant(rawBits <= _StringObject.CountAndFlags.countMask)
Expand All @@ -816,13 +852,21 @@ extension _StringObject.CountAndFlags {
if isTailAllocated {
rawBits |= _StringObject.CountAndFlags.isTailAllocatedMask
}

// Tail allocated strings are always terminated, others may or may not be
if isNullTerminated || isNativelyStored {
rawBits |= _StringObject.CountAndFlags.isNullTerminatedMask
}

self.init(raw: rawBits)
_internalInvariant(count == self.count)
_internalInvariant(isASCII == self.isASCII)
_internalInvariant(isNFC == self.isNFC)
_internalInvariant(isNativelyStored == self.isNativelyStored)
_internalInvariant(isTailAllocated == self.isTailAllocated)
_internalInvariant(
isNullTerminated == self.isNullTerminated ||
isNativelyStored == self.isNullTerminated)
}

@inlinable @inline(__always)
Expand All @@ -846,7 +890,18 @@ extension _StringObject.CountAndFlags {
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: false,
isTailAllocated: true)
isTailAllocated: true,
isNullTerminated: false)
}
@_alwaysEmitIntoClient @inline(__always)
internal init(nullTerminatedImmortalCount immortalCount: Int, isASCII: Bool) {
self.init(
count: immortalCount,
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: false,
isTailAllocated: true,
isNullTerminated: true)
}
@inline(__always)
internal init(mortalCount: Int, isASCII: Bool) {
Expand All @@ -855,7 +910,18 @@ extension _StringObject.CountAndFlags {
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: true,
isTailAllocated: true)
isTailAllocated: true,
isNullTerminated: true)
}
@inline(__always)
internal init(sharedCount: Int, isASCII: Bool, isNullTerminated: Bool) {
self.init(
count: sharedCount,
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: false,
isTailAllocated: false,
isNullTerminated: isNullTerminated)
}
@inline(__always)
internal init(sharedCount: Int, isASCII: Bool) {
Expand All @@ -864,7 +930,8 @@ extension _StringObject.CountAndFlags {
isASCII: isASCII,
isNFC: isASCII,
isNativelyStored: false,
isTailAllocated: false)
isTailAllocated: false,
isNullTerminated: false)
}

//
Expand Down Expand Up @@ -909,6 +976,12 @@ extension _StringObject.CountAndFlags {
internal var isForeignUTF8: Bool {
(_storage & Self.isForeignUTF8Mask) != 0
}

@_alwaysEmitIntoClient
@inline(__always) // Swift 6.2
internal var isNullTerminated: Bool {
return 0 != _storage & _StringObject.CountAndFlags.isNullTerminatedMask
}

#if !INTERNAL_CHECKS_ENABLED
@inlinable @inline(__always) internal func _invariantCheck() {}
Expand Down Expand Up @@ -1219,16 +1292,40 @@ extension _StringObject {
// Small strings nul-terminate when spilling for contiguous access
if isSmall { return true }

// TODO(String performance): Use performance flag, which could be more
// inclusive. For now, we only know native strings and small strings (when
// accessed) are. We could also know about some shared strings.

return largeFastIsTailAllocated
return largeFastIsTailAllocated || _countAndFlags.isNullTerminated
}
}

// Object creation
extension _StringObject {
@_alwaysEmitIntoClient @inline(__always)
internal init(
nullTerminatedImmortal bufPtr: UnsafeBufferPointer<UInt8>,
isASCII: Bool
) {
let countAndFlags = CountAndFlags(
nullTerminatedImmortalCount: bufPtr.count, isASCII: isASCII)
#if _pointerBitWidth(_64)
// We bias to align code paths for mortal and immortal strings
let biasedAddress = unsafe UInt(
bitPattern: bufPtr.baseAddress._unsafelyUnwrappedUnchecked
) &- _StringObject.nativeBias

self.init(
pointerBits: UInt64(truncatingIfNeeded: biasedAddress),
discriminator: Nibbles.largeImmortal(),
countAndFlags: countAndFlags)
#elseif _pointerBitWidth(_32) || _pointerBitWidth(_16)
self.init(
variant: .immortal(start: bufPtr.baseAddress._unsafelyUnwrappedUnchecked),
discriminator: Nibbles.largeImmortal(),
countAndFlags: countAndFlags)
#else
#error("Unknown platform")
#endif
}


@inlinable @inline(__always)
internal init(immortal bufPtr: UnsafeBufferPointer<UInt8>, isASCII: Bool) {
let countAndFlags = CountAndFlags(
Expand Down Expand Up @@ -1300,7 +1397,11 @@ extension _StringObject {
internal init(
cocoa: AnyObject, providesFastUTF8: Bool, isASCII: Bool, length: Int
) {
let countAndFlags = CountAndFlags(sharedCount: length, isASCII: isASCII)
let countAndFlags = CountAndFlags(
sharedCount: length,
isASCII: isASCII,
isNullTerminated: providesFastUTF8
)
let discriminator = Nibbles.largeCocoa(providesFastUTF8: providesFastUTF8)
#if $Embedded
fatalError("unreachable in embedded Swift")
Expand All @@ -1327,7 +1428,11 @@ extension _StringObject {
isASCII: Bool,
length: Int
) {
let countAndFlags = CountAndFlags(sharedCount: length, isASCII: isASCII)
let countAndFlags = CountAndFlags(
sharedCount: length,
isASCII: isASCII,
isNullTerminated: providesFastUTF8
)
let discriminator = Nibbles.largeFastImmortalCocoa()
#if $Embedded
fatalError("unreachable in embedded Swift")
Expand Down
17 changes: 11 additions & 6 deletions stdlib/public/core/StringStorageBridge.swift
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ extension _AbstractStringStorage {
@inline(__always)
@_effects(readonly)
internal func _cString(encoding: UInt) -> UnsafePointer<UInt8>? {
switch (encoding, isASCII) {
case (_cocoaASCIIEncoding, true),
(_cocoaUTF8Encoding, _):
switch (encoding, isASCII, asString._guts._object.isFastZeroTerminated) {
case (_cocoaASCIIEncoding, true, true),
(_cocoaUTF8Encoding, _, true):
return unsafe start
default:
return unsafe _cocoaCStringUsingEncodingTrampoline(self, encoding)
Expand Down Expand Up @@ -303,16 +303,21 @@ extension __SharedStringStorage {
final internal func _fastCStringContents(
_ requiresNulTermination: Int8
) -> UnsafePointer<CChar>? {
if isASCII {
return unsafe start._asCChar
if 0 == requiresNulTermination || asString._guts._object.isFastZeroTerminated {
if isASCII {
return unsafe start._asCChar
}
}
return nil
}

@objc(UTF8String)
@_effects(readonly)
final internal func _utf8String() -> UnsafePointer<UInt8>? {
return start
if asString._guts._object.isFastZeroTerminated {
return start
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not entirely clear on why this doesn't need an unsafe, but it was warning about it

}
return unsafe _cocoaUTF8StringTrampoline(self)
}

@objc(cStringUsingEncoding:)
Expand Down
Loading