Skip to content

Commit 3dc8e31

Browse files
author
Dave Abrahams
committed
[stdlib] Speed up Character translation
Do only a single pass over the UTF8, rather than two.
1 parent b1ef69d commit 3dc8e31

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

stdlib/public/core/Character.swift

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -295,22 +295,21 @@ public struct Character :
295295
typealias Indices = CountableRange<Int>
296296

297297
init(_ u8: UInt64) {
298-
let count = UTF16.transcodedLength(
299-
of: _SmallUTF8(u8).makeIterator(),
300-
decodedAs: UTF8.self,
301-
repairingIllFormedSequences: true)!.0
302-
_sanityCheck(count <= 4, "Character with more than 4 UTF-16 code units")
303-
self.count = UInt16(count)
298+
var utf16Count = 0
304299
var u16: UInt64 = 0
305-
let output: (UTF16.CodeUnit) -> Void = {
306-
u16 = u16 &<< 16
307-
u16 = u16 | UInt64(extendingOrTruncating: $0)
300+
301+
UTF8.parseForward(_SmallUTF8(u8)) {
302+
let c = $0.utf16.count
303+
utf16Count += c
304+
u16 = u16 &<< 16 | UInt64(
305+
extendingOrTruncating: $0.utf16.first.unsafelyUnwrapped)
306+
307+
if _slowPath(c > 1) {
308+
u16 = u16 &<< 16 | UInt64(
309+
extendingOrTruncating: $0.utf16.last.unsafelyUnwrapped)
310+
}
308311
}
309-
_ = transcode(
310-
_SmallUTF8(u8).makeIterator(),
311-
from: UTF8.self, to: UTF16.self,
312-
stoppingOnError: false,
313-
into: output)
312+
self.count = UInt16(utf16Count)
314313
self.data = u16
315314
}
316315

0 commit comments

Comments
 (0)