swiftlang · glessard · Jan 22, 2024 · Sep 8, 2023 · Sep 9, 2023 · Sep 10, 2023
@@ -493,6 +493,116 @@ extension String {
     self = String._fromNonContiguousUnsafeBitcastUTF8Repairing(codeUnits).0
   }
 
+  /// Creates a new string by copying and validating the sequence of
+  /// code units passed in, according to the specified encoding.
+  ///
+  /// This initializer does not try to repair ill-formed code unit sequences.
+  /// If any are found, the result of the initializer is `nil`.
+  ///
+  /// The following example calls this initializer with the contents of two
+  /// different arrays---first with a well-formed UTF-8 code unit sequence and
+  /// then with an ill-formed UTF-16 code unit sequence.
+  ///
+  ///     let validUTF8: [UInt8] = [67, 97, 0, 102, 195, 169]
+  ///     let valid = String(validating: validUTF8, as: UTF8.self)
+  ///     print(valid ?? "nil")
+  ///     // Prints "Café"
+  ///
+  ///     let invalidUTF16: [UInt16] = [0x41, 0x42, 0xd801]
+  ///     let invalid = String(validating: invalidUTF16, as: UTF16.self)
+  ///     print(invalid ?? "nil")
+  ///     // Prints "nil"
+  ///
+  /// - Parameters:
+  ///   - codeUnits: A sequence of code units that encode a `String`
+  ///   - encoding: A conformer to `Unicode.Encoding` to be used
+  ///               to decode `codeUnits`.
+  @inlinable
+  @available(SwiftStdlib 5.11, *)
+  public init?<Encoding: Unicode.Encoding>(
+    validating codeUnits: some Sequence<Encoding.CodeUnit>,
+    as encoding: Encoding.Type
+  ) {
+    let contiguousResult = codeUnits.withContiguousStorageIfAvailable {
+      String._validate($0, as: Encoding.self)
+    }
+    if let validationResult = contiguousResult {
+      guard let validatedString = validationResult else {
+        return nil
+      }
+      self = validatedString
+      return
+    }
+
+    // slow-path
+    var transcoded: [UTF8.CodeUnit] = []
+    transcoded.reserveCapacity(codeUnits.underestimatedCount)
+    var isASCII = true
+    let error = transcode(
+      codeUnits.makeIterator(),
+      from: Encoding.self,
+      to: UTF8.self,
+      stoppingOnError: true,
+      into: {
+        uint8 in
+        transcoded.append(uint8)
+        if isASCII && (uint8 & 0x80) == 0x80 { isASCII = false }
+      }
+    )
+    if error { return nil }
+    self = transcoded.withUnsafeBufferPointer{
+      String._uncheckedFromUTF8($0, asciiPreScanResult: isASCII)
+    }
+  }
+
+  /// Creates a new string by copying and validating the sequence of
+  /// code units passed in, according to the specified encoding.
+  ///
+  /// This initializer does not try to repair ill-formed code unit sequences.
+  /// If any are found, the result of the initializer is `nil`.
+  ///
+  /// The following example calls this initializer with the contents of two
+  /// different arrays---first with a well-formed UTF-8 code unit sequence and
+  /// then with an ill-formed ASCII code unit sequence.
+  ///
+  ///     let validUTF8: [Int8] = [67, 97, 0, 102, -61, -87]
+  ///     let valid = String(validating: validUTF8, as: UTF8.self)
+  ///     print(valid ?? "nil")
+  ///     // Prints "Café"
+  ///
+  ///     let invalidASCII: [Int8] = [67, 97, -5]
+  ///     let invalid = String(validating: invalidASCII, as: Unicode.ASCII.self)
+  ///     print(invalid ?? "nil")
+  ///     // Prints "nil"
+  ///
+  /// - Parameters:
+  ///   - codeUnits: A sequence of code units that encode a `String`
+  ///   - encoding: A conformer to `Unicode.Encoding` that can decode
+  ///               `codeUnits` as `UInt8`
+  @inlinable
+  @available(SwiftStdlib 5.11, *)
+  public init?<Encoding>(
+    validating codeUnits: some Sequence<Int8>,
+    as encoding: Encoding.Type
+  ) where Encoding: Unicode.Encoding, Encoding.CodeUnit == UInt8 {
+    let contiguousResult = codeUnits.withContiguousStorageIfAvailable {
+      $0.withMemoryRebound(to: UInt8.self) {
+        String._validate($0, as: Encoding.self)
+      }
+    }
+    if let validationResult = contiguousResult {
+      guard let validatedString = validationResult else {
+        return nil
+      }
+      self = validatedString
+      return
+    }
+
+    // slow-path
+    let uint8s = codeUnits.lazy.map(UInt8.init(bitPattern:))
+    self.init(validating: uint8s, as: Encoding.self)
+  }
+
   /// Creates a new string with the specified capacity in UTF-8 code units, and
   /// then calls the given closure with a buffer covering the string's
   /// uninitialized memory.

@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift.org open source project
 //
-// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
+// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
@@ -298,4 +298,80 @@ extension String {
       String._uncheckedFromUTF8($0)
     }
   }
+
+  @usableFromInline
+  @available(SwiftStdlib 5.11, *)
+  internal static func _validate<Encoding: Unicode.Encoding>(
+    _ input: UnsafeBufferPointer<Encoding.CodeUnit>,
+    as encoding: Encoding.Type
+  ) -> String? {
+    if encoding.CodeUnit.self == UInt8.self {
+      let bytes = _identityCast(input, to: UnsafeBufferPointer<UInt8>.self)
+      if encoding.self == UTF8.self {
+        guard case .success(let info) = validateUTF8(bytes) else { return nil }
+        return String._uncheckedFromUTF8(bytes, asciiPreScanResult: info.isASCII)
+      } else if encoding.self == Unicode.ASCII.self {
+        guard _allASCII(bytes) else { return nil }
+        return String._uncheckedFromASCII(bytes)
+      }
+    }
+
+    // slow-path
+    var isASCII = true
+    var buffer: UnsafeMutableBufferPointer<UInt8>
+    buffer = UnsafeMutableBufferPointer.allocate(capacity: input.count*3)
+    var written = buffer.startIndex
+
+    var parser = Encoding.ForwardParser()
+    var input = input.makeIterator()
+
+    transcodingLoop:
+    while true {
+      switch parser.parseScalar(from: &input) {
+      case .valid(let s):
+        let scalar = Encoding.decode(s)
+        guard let utf8 = Unicode.UTF8.encode(scalar) else {
+          // transcoding error: clean up and return nil
+          fallthrough
+        }
+        if buffer.count < written + utf8.count {
+          let newCapacity = buffer.count + (buffer.count >> 1)
+          let copy: UnsafeMutableBufferPointer<UInt8>
+          copy = UnsafeMutableBufferPointer.allocate(capacity: newCapacity)
+          let copied = copy.moveInitialize(
+            fromContentsOf: buffer.prefix(upTo: written)
+          )
+          buffer.deallocate()
+          buffer = copy
+          written = copied
+        }
+        if isASCII && utf8.count > 1 {
+          isASCII = false
+        }
+        written = buffer.suffix(from: written).initialize(fromContentsOf: utf8)
+        break
+      case .error:
+        // validation error: clean up and return nil
+        buffer.prefix(upTo: written).deinitialize()
+        buffer.deallocate()
+        return nil
+      case .emptyInput:
+        break transcodingLoop
+      }
+    }
+
+    let storage = buffer.baseAddress.map {
+      __SharedStringStorage(
+        _mortal: $0,
+        countAndFlags: _StringObject.CountAndFlags(
+          count: buffer.startIndex.distance(to: written),
+          isASCII: isASCII,
+          isNFC: isASCII,
+          isNativelyStored: false,
+          isTailAllocated: false
+        )
+      )
+    }
+    return storage?.asString
+  }
 }
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift.org open source project
 //
-// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
+// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
@@ -681,6 +681,8 @@ final internal class __SharedStringStorage
 
   internal var _breadcrumbs: _StringBreadcrumbs? = nil
 
+  internal var immortal = false
+
   internal var count: Int { _countAndFlags.count }
 
   internal init(
@@ -689,6 +691,7 @@ final internal class __SharedStringStorage
   ) {
     self._owner = nil
     self.start = ptr
+    self.immortal = true
 #if _pointerBitWidth(_64)
     self._countAndFlags = countAndFlags
 #elseif _pointerBitWidth(_32)
@@ -709,6 +712,32 @@ final internal class __SharedStringStorage
       return String(_StringGuts(self))
     }
   }
+
+  internal init(
+    _mortal ptr: UnsafePointer<UInt8>,
+    countAndFlags: _StringObject.CountAndFlags
+  ) {
+    // ptr *must* be the start of an allocation
+    self._owner = nil
+    self.start = ptr
+    self.immortal = false
+#if _pointerBitWidth(_64)
+    self._countAndFlags = countAndFlags
+#elseif _pointerBitWidth(_32)
+    self._count = countAndFlags.count
+    self._countFlags = countAndFlags.flags
+#else
+#error("Unknown platform")
+#endif
+    super.init()
+    self._invariantCheck()
+  }
+
+  deinit {
+    if (_owner == nil) && !immortal {
+      start.deallocate()
+    }
+  }
 }
 
 extension __SharedStringStorage {

@@ -45,6 +45,15 @@ Added: _$ss19_getWeakRetainCountySuyXlF
 // Swift._getUnownedRetainCount(Swift.AnyObject) -> Swift.UInt
 Added: _$ss22_getUnownedRetainCountySuyXlF
 
+// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == B.Element>(validating: B, as: A.Type) -> Swift.String?
+Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_7ElementQy_8CodeUnitRtzr0_lufC
+
+// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == Swift.UInt8, B.Element == Swift.Int8>(validating: B, as: A.Type) -> Swift.String?
+Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_s5UInt8V8CodeUnitRtzs4Int8V7ElementRt_r0_lufC
+
+// static Swift.String._validate<A where A: Swift._UnicodeEncoding>(_: Swift.UnsafeBufferPointer<A.CodeUnit>, as: A.Type) -> Swift.String?
+Added: _$sSS9_validate_2asSSSgSRy8CodeUnitQzG_xmts16_UnicodeEncodingRzlFZ
+
 // class __StaticArrayStorage
 Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfC
 Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfCTj

@@ -45,6 +45,15 @@ Added: _$ss19_getWeakRetainCountySuyXlF
 // Swift._getUnownedRetainCount(Swift.AnyObject) -> Swift.UInt
 Added: _$ss22_getUnownedRetainCountySuyXlF
 
+// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == B.Element>(validating: B, as: A.Type) -> Swift.String?
+Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_7ElementQy_8CodeUnitRtzr0_lufC
+
+// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == Swift.UInt8, B.Element == Swift.Int8>(validating: B, as: A.Type) -> Swift.String?
+Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_s5UInt8V8CodeUnitRtzs4Int8V7ElementRt_r0_lufC
+
+// static Swift.String._validate<A where A: Swift._UnicodeEncoding>(_: Swift.UnsafeBufferPointer<A.CodeUnit>, as: A.Type) -> Swift.String?
+Added: _$sSS9_validate_2asSSSgSRy8CodeUnitQzG_xmts16_UnicodeEncodingRzlFZ
+
 // class __StaticArrayStorage
 Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfC
 Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfCTj