/
unicodeBidi.ts
216 lines (198 loc) · 5.23 KB
/
unicodeBidi.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// Copyright 2024 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
/**
* Left-to-Right Isolate
* Sets direction to LTR and isolates the embedded content from the surrounding text
* @example
* ```html
* <div dir="ltr">...</div>
* ```
*/
export const LTR_ISOLATE = '\u2066';
/**
* Right-to-Left Isolate
* Sets direction to RTL and isolates the embedded content from the surrounding text
* @example
* ```html
* <div dir="rtl">...</div>
* ```
*/
export const RTL_ISOLATE = '\u2067';
/**
* First Strong Isolate
* Sets direction to the first strong directional character in the embedded
* content and isolates it from the surrounding text
* @example
* ```html
* <div dir="auto">...</div>
* ```
*/
export const FIRST_STRONG_ISOLATE = '\u2068';
/**
* Pop Directional Isolate
* Terminates the scope of the last LRI, RLI, FSI, or PDI, and returns to the
* embedding level of the surrounding text
* @example
* ```html
* </div>
* ```
*/
export const POP_DIRECTIONAL_ISOLATE = '\u2069';
/**
* Left-to-Right Embedding
* Sets direction to LTR but allows embedded text to interact with
* surrounding text, so risk of spillover effects
* @example
* ```html
* <bdo dir="ltr">...</bdo>
* ```
*/
export const LTR_EMBEDDING = '\u202A';
/**
* Right-to-Left Embedding
* Sets direction to RTL but allows embedded text to interact with surrounding
* text, so risk of spillover effects
* @example
* ```html
* <bdo dir="rtl">...</bdo>
* ```
*/
export const RTL_EMBEDDING = '\u202B';
/**
* Pop Directional Formatting
* Terminates the scope of the last LRE, RLE, LRI, RLI, FSI, or PDI, and
* returns to the embedding level of the surrounding text
* @example
* ```html
* </bdo>
* ```
*/
export const POP_DIRECTIONAL_FORMATTING = '\u202C';
/**
* Left-to-Right Override
* Forces direction to LTR, even if the surrounding text is RTL
* @example
* ```html
* <bdo dir="ltr">...</bdo>
* ```
*/
export const LTR_OVERRIDE = '\u202D';
/**
* Right-to-Left Override
* Forces direction to RTL, even if the surrounding text is LTR
* @example
* ```html
* <bdo dir="rtl">...</bdo>
* ```
*/
export const RTL_OVERRIDE = '\u202E';
export const ANY_UNICODE_DIR_CONTROL_CHAR_REGEX = new RegExp(
[
LTR_ISOLATE,
RTL_ISOLATE,
FIRST_STRONG_ISOLATE,
POP_DIRECTIONAL_ISOLATE,
LTR_EMBEDDING,
RTL_EMBEDDING,
POP_DIRECTIONAL_FORMATTING,
LTR_OVERRIDE,
RTL_OVERRIDE,
].join('|')
);
export function hasAnyUnicodeDirControlChars(input: string): boolean {
return input.match(ANY_UNICODE_DIR_CONTROL_CHAR_REGEX) != null;
}
/**
* You probably want `bidiIsolate` instead of this function.
*
* Ensures that the input string has balanced Unicode directional control
* characters. If the input string has unbalanced control characters, this
* function will add the necessary characters to balance them.
*/
function balanceUnicodeDirControlChars(input: string): string {
// This gets called by i18n code on many strings, so we want to avoid
// as much work as possible
if (!hasAnyUnicodeDirControlChars(input)) {
return input;
}
let result = '';
let formattingDepth = 0;
let isolateDepth = 0;
// We need to scan the entire input string and drop some characters as we
// go in case they are closing something that was never opened.
for (let index = 0; index < input.length; index += 1) {
const char = input[index];
switch (char) {
case LTR_EMBEDDING:
case RTL_EMBEDDING:
case LTR_OVERRIDE:
case RTL_OVERRIDE:
formattingDepth += 1;
result += char;
break;
case POP_DIRECTIONAL_FORMATTING:
formattingDepth -= 1;
// skip if its closing formatting that was never opened
if (formattingDepth >= 0) {
result += char;
}
break;
case LTR_ISOLATE:
case RTL_ISOLATE:
case FIRST_STRONG_ISOLATE:
isolateDepth += 1;
result += char;
break;
case POP_DIRECTIONAL_ISOLATE:
isolateDepth -= 1;
// skip if its closing an isolate that was never opened
if (isolateDepth >= 0) {
result += char;
}
break;
default:
result += char;
break;
}
}
// Ensure everything is closed
let suffix = '';
if (formattingDepth > 0) {
suffix += POP_DIRECTIONAL_FORMATTING.repeat(formattingDepth);
}
if (isolateDepth > 0) {
suffix += POP_DIRECTIONAL_ISOLATE.repeat(isolateDepth);
}
return result + suffix;
}
/**
* @private
* Exported for testing
*/
export function _bidiIsolate(text: string): string {
// Wrap with with first strong isolate so directional characters appear
// correctly.
return (
FIRST_STRONG_ISOLATE +
balanceUnicodeDirControlChars(text) +
POP_DIRECTIONAL_ISOLATE
);
}
/**
* BEFORE YOU USE THIS, YOU PROBABLY WANT TO USE HTML ELEMENTS WITH `dir` ATTRIBUTES
*
* Wraps the input string with Unicode directional control characters to ensure
* that the text is displayed correctly in a bidirectional context.
*
* @example
* ```ts
* bidiIsolate('Hello') === '\u2068Hello\u2069'
* ```
*/
export function bidiIsolate(text: string): string {
if (process.env.IS_TESTS != null) {
// Turn this off in tests to make it easier to compare strings
return text;
}
return _bidiIsolate(text);
}