From 2f1ed631e2fbe325a72d40fa60e6280ee1d173e4 Mon Sep 17 00:00:00 2001
From: Karoy Lorentey <klorentey@apple.com>
Date: Fri, 6 Jan 2023 13:52:27 -0800
Subject: [PATCH] [stdlib] _CharacterRecognizer: Add Sendable, Equatable,
 CustomStringConvertible conformances

Equatability allows faster implementations for updating cached grapheme boundary state after a text mutation, because it enables quick detection of before/after state equality, without having to feed the recognizers until they produce a synchronized grapheme break.

The CustomStringConvertible conformance makes it orders of magnitude more pleasant to debug code that uses this.

Sendable is a baseline requirement for value types these days.
---
 .../public/core/StringGraphemeBreaking.swift  | 31 +++++++++++++++--
 test/stdlib/CharacterRecognizer.swift         | 33 +++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/stdlib/public/core/StringGraphemeBreaking.swift b/stdlib/public/core/StringGraphemeBreaking.swift
index b602485b72da4..ff842039d7b47 100644
--- a/stdlib/public/core/StringGraphemeBreaking.swift
+++ b/stdlib/public/core/StringGraphemeBreaking.swift
@@ -407,7 +407,7 @@ extension Unicode.Scalar {
   }
 }
 
-internal struct _GraphemeBreakingState {
+internal struct _GraphemeBreakingState: Sendable, Equatable {
   // When we're looking through an indic sequence, one of the requirements is
   // that there is at LEAST 1 Virama present between two linking consonants.
   // This value helps ensure that when we ultimately need to decide whether or
@@ -436,6 +436,18 @@ internal struct _GraphemeBreakingState {
   var shouldBreakRI = false
 }
 
+extension _GraphemeBreakingState: CustomStringConvertible {
+  var description: String {
+    var r = "["
+    if hasSeenVirama { r += "V" }
+    if isInEmojiSequence { r += "E" }
+    if isInIndicSequence { r += "I" }
+    if shouldBreakRI { r += "R" }
+    r += "]"
+    return r
+  }
+}
+
 extension Unicode {
   /// A state machine for recognizing character (i.e., extended grapheme
   /// cluster) boundaries in an arbitrary series of Unicode scalars.
@@ -448,7 +460,7 @@ extension Unicode {
   /// `String` splits its contents into `Character` values.
   @available(SwiftStdlib 5.8, *)
   public // SPI(Foundation) FIXME: We need API for this
-  struct _CharacterRecognizer {
+  struct _CharacterRecognizer: Sendable {
     internal var _previous: Unicode.Scalar
     internal var _state: _GraphemeBreakingState
 
@@ -547,6 +559,21 @@ extension Unicode {
   }
 }
 
+@available(SwiftStdlib 5.8, *)
+extension Unicode._CharacterRecognizer: Equatable {
+  public static func ==(left: Self, right: Self) -> Bool {
+    left._previous == right._previous && left._state == right._state
+  }
+}
+
+@available(SwiftStdlib 5.8, *)
+extension Unicode._CharacterRecognizer: CustomStringConvertible {
+  public var description: String {
+    return "\(_state)U+\(String(_previous.value, radix: 16, uppercase: true))"
+  }
+}
+
+
 extension _StringGuts {
   // Returns the stride of the grapheme cluster starting at offset `index`,
   // assuming it is on a grapheme cluster boundary.
diff --git a/test/stdlib/CharacterRecognizer.swift b/test/stdlib/CharacterRecognizer.swift
index 6bacbaaacac54..12816c6a164f7 100644
--- a/test/stdlib/CharacterRecognizer.swift
+++ b/test/stdlib/CharacterRecognizer.swift
@@ -114,3 +114,36 @@ if #available(SwiftStdlib 5.8, *) {
       """)
   }
 }
+
+if #available(SwiftStdlib 5.8, *) {
+  suite.test("Equatable") {
+    var r1 = Unicode._CharacterRecognizer()
+    var r2 = Unicode._CharacterRecognizer()
+    expectEqual(r1, r2)
+    expectTrue(r1.hasBreak(before: "a"))
+    expectNotEqual(r1, r2)
+    expectTrue(r2.hasBreak(before: "a"))
+    expectEqual(r1, r2)
+    expectTrue(r2.hasBreak(before: "\u{1f44f}")) // CLAPPING HANDS SIGN
+    expectNotEqual(r1, r2)
+    expectTrue(r1.hasBreak(before: "b"))
+    expectNotEqual(r1, r2)
+    expectFalse(r2.hasBreak(before: "\u{1f3fc}")) // EMOJI MODIFIER FITZPATRICK TYPE-3
+    expectNotEqual(r1, r2)
+    expectTrue(r2.hasBreak(before: "b"))
+    expectEqual(r1, r2) // breaks should reset state
+  }
+}
+
+if #available(SwiftStdlib 5.8, *) {
+  suite.test("CustomStringConvertible") {
+    var r = Unicode._CharacterRecognizer()
+    expectEqual("\(r)", "[]U+0")
+    expectTrue(r.hasBreak(before: "\u{1F1FA}")) // REGIONAL INDICATOR SYMBOL LETTER U
+    expectEqual("\(r)", "[]U+1F1FA")
+    expectFalse(r.hasBreak(before: "\u{1F1F8}")) // REGIONAL INDICATOR SYMBOL LETTER S
+    expectEqual("\(r)", "[R]U+1F1F8")
+    expectTrue(r.hasBreak(before: "$"))
+    expectEqual("\(r)", "[]U+24")
+  }
+}