From d2ee1cec93ab59c53dd4377b5c2f601ede1ba298 Mon Sep 17 00:00:00 2001 From: David Smith Date: Sat, 25 Jan 2025 01:54:49 -0800 Subject: [PATCH] Attempt to transcode into smol strings --- stdlib/public/core/String.swift | 4 ++++ stdlib/public/core/StringCreate.swift | 34 +++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/stdlib/public/core/String.swift b/stdlib/public/core/String.swift index aca5fde7dbe86..b94bcd59d4f11 100644 --- a/stdlib/public/core/String.swift +++ b/stdlib/public/core/String.swift @@ -445,6 +445,10 @@ extension String { public init( decoding codeUnits: C, as sourceEncoding: Encoding.Type ) where C.Iterator.Element == Encoding.CodeUnit { + guard codeUnits.count > 0 else { + self = "" + return + } guard _fastPath(sourceEncoding == UTF8.self) else { self = String._fromCodeUnits( codeUnits, encoding: sourceEncoding, repair: true)!.0 diff --git a/stdlib/public/core/StringCreate.swift b/stdlib/public/core/StringCreate.swift index 2aaf097629ebc..8c3a2f670dce0 100644 --- a/stdlib/public/core/StringCreate.swift +++ b/stdlib/public/core/StringCreate.swift @@ -203,7 +203,6 @@ extension String { return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) } } - @inline(never) // slow path private static func _slowFromCodeUnits< Input: Collection, Encoding: Unicode.Encoding @@ -213,7 +212,34 @@ extension String { repair: Bool ) -> (String, repairsMade: Bool)? where Input.Element == Encoding.CodeUnit { - // TODO(String Performance): Attempt to form smol strings + if input.count < _SmallString.capacity { + var repaired = false + var overflow = false + let result = _SmallString(initializingUTF8With: { buffer in + var bytesUsed = 0 + repaired = transcode( + input.makeIterator(), + from: encoding, + to: UTF8.self, + stoppingOnError: false, + into: { + if bytesUsed < buffer.count { + buffer[bytesUsed] = $0 + } + bytesUsed &+= 1 + } + ) + guard bytesUsed <= buffer.count else { + overflow = true + return 0 + } + return bytesUsed + }) + if !overflow { + return repair || !repaired + ? (String(_StringGuts(result)), repairsMade: repaired) : nil + } + } // TODO(String performance): Skip intermediary array, transcode directly // into a StringStorage space. @@ -236,6 +262,10 @@ extension String { where Input == UnsafeBufferPointer, Encoding == Unicode.ASCII) @_specialize( where Input == Array, Encoding == Unicode.ASCII) + @_specialize( + where Input == UnsafeBufferPointer, Encoding == UTF16) + @_specialize( + where Input == Array, Encoding == UTF16) internal static func _fromCodeUnits< Input: Collection, Encoding: Unicode.Encoding