diff --git a/packages/roosterjs-content-model-dom/lib/modelApi/common/addTextSegment.ts b/packages/roosterjs-content-model-dom/lib/modelApi/common/addTextSegment.ts
index dc8536468de5..4a93d275148c 100644
--- a/packages/roosterjs-content-model-dom/lib/modelApi/common/addTextSegment.ts
+++ b/packages/roosterjs-content-model-dom/lib/modelApi/common/addTextSegment.ts
@@ -4,6 +4,7 @@ import { createText } from '../creators/createText';
import { ensureParagraph } from './ensureParagraph';
import { hasSpacesOnly } from './hasSpacesOnly';
import { isWhiteSpacePreserved } from '../../domUtils/isWhiteSpacePreserved';
+import { stripInvisibleUnicode } from './stripInvisibleUnicode';
import type {
ContentModelBlockGroup,
ContentModelText,
@@ -32,7 +33,13 @@ export function addTextSegment(
(paragraph?.segments.length ?? 0) > 0 ||
isWhiteSpacePreserved(paragraph?.format.whiteSpace)
) {
- textModel = createText(text, context.segmentFormat);
+ const filteredText =
+ context.experimentalFeatures &&
+ context.experimentalFeatures.indexOf('FilterInvisibleUnicode') > -1
+ ? stripInvisibleUnicode(text)
+ : text;
+
+ textModel = createText(filteredText, context.segmentFormat);
if (context.isInSelection) {
textModel.isSelected = true;
diff --git a/packages/roosterjs-content-model-dom/lib/modelApi/common/stripInvisibleUnicode.ts b/packages/roosterjs-content-model-dom/lib/modelApi/common/stripInvisibleUnicode.ts
new file mode 100644
index 000000000000..07e9858d369a
--- /dev/null
+++ b/packages/roosterjs-content-model-dom/lib/modelApi/common/stripInvisibleUnicode.ts
@@ -0,0 +1,14 @@
+// According to https://embracethered.com/blog/posts/2024/hiding-and-finding-text-with-unicode-tags/
+// there are some invisible unicode characters in the range of U+E0000 to U+EFFFF, which are used for hiding text in HTML.
+// We need to strip them out before processing the pasted content, otherwise they will be treated as normal text and cause unexpected behavior.
+const INVISIBLE_UNICODE_REGEX = /[\u{E0000}-\u{EFFFF}]/gu;
+
+/**
+ * @internal
+ * Strip invisible unicode characters from the given string
+ * @param value The string to be processed
+ * @returns The string with invisible unicode characters removed
+ */
+export function stripInvisibleUnicode(value: string): string {
+ return value.replace(INVISIBLE_UNICODE_REGEX, '');
+}
diff --git a/packages/roosterjs-content-model-dom/test/endToEndTest.ts b/packages/roosterjs-content-model-dom/test/endToEndTest.ts
index 91a11f330c2b..87807d48cbdc 100644
--- a/packages/roosterjs-content-model-dom/test/endToEndTest.ts
+++ b/packages/roosterjs-content-model-dom/test/endToEndTest.ts
@@ -3271,6 +3271,77 @@ describe('End to end test for DOM => Model => DOM/TEXT', () => {
);
});
+ it('Text with invisible unicode tag characters is stripped when FilterInvisibleUnicode feature is enabled', () => {
+ // Source HTML contains U+E0041 / U+E0042 (unicode tag range — must be stripped)
+ // mixed with U+200B (ZWSP), U+200D (ZWJ), U+202E (RLO), U+202C (PDF)
+ // which must be preserved.
+ const div1 = document.createElement('div');
+ div1.innerHTML = '
a\u{E0041}b\u{200B}c\u{E0042}d\u{202E}evil\u{202C}e
';
+
+ const model = domToContentModel(
+ div1,
+ createDomToModelContext({ experimentalFeatures: ['FilterInvisibleUnicode'] })
+ );
+
+ expect(model).toEqual({
+ blockGroupType: 'Document',
+ blocks: [
+ {
+ blockType: 'Paragraph',
+ segments: [
+ {
+ segmentType: 'Text',
+ text: 'ab\u{200B}cd\u{202E}evil\u{202C}e',
+ format: {},
+ },
+ ],
+ format: {
+ marginTop: '1em',
+ marginBottom: '1em',
+ },
+ decorator: {
+ tagName: 'p',
+ format: {},
+ },
+ },
+ ],
+ });
+
+ const text = contentModelToText(model);
+ expect(text).toBe('ab\u{200B}cd\u{202E}evil\u{202C}e');
+ });
+
+ it('Text with invisible unicode tag characters is NOT stripped when feature is disabled', () => {
+ const div1 = document.createElement('div');
+ div1.innerHTML = 'a\u{E0041}b\u{E0042}c
';
+
+ const model = domToContentModel(div1, createDomToModelContext());
+
+ expect(model).toEqual({
+ blockGroupType: 'Document',
+ blocks: [
+ {
+ blockType: 'Paragraph',
+ segments: [
+ {
+ segmentType: 'Text',
+ text: 'a\u{E0041}b\u{E0042}c',
+ format: {},
+ },
+ ],
+ format: {
+ marginTop: '1em',
+ marginBottom: '1em',
+ },
+ decorator: {
+ tagName: 'p',
+ format: {},
+ },
+ },
+ ],
+ });
+ });
+
it('LI without UL followed by other blocks', () => {
runTest(
'testother
',
diff --git a/packages/roosterjs-content-model-dom/test/modelApi/common/addTextSegmentTest.ts b/packages/roosterjs-content-model-dom/test/modelApi/common/addTextSegmentTest.ts
index c3ccabd1dfa1..9d83c04ca885 100644
--- a/packages/roosterjs-content-model-dom/test/modelApi/common/addTextSegmentTest.ts
+++ b/packages/roosterjs-content-model-dom/test/modelApi/common/addTextSegmentTest.ts
@@ -206,4 +206,56 @@ describe('addTextSegment', () => {
],
});
});
+
+ it('Add text with invisible unicode, feature enabled', () => {
+ const group = createContentModelDocument();
+ const context = createDomToModelContext({
+ experimentalFeatures: ['FilterInvisibleUnicode'],
+ });
+
+ addTextSegment(group, 'a\u{E0041}b\u{E0042}c', context);
+
+ expect(group).toEqual({
+ blockGroupType: 'Document',
+ blocks: [
+ {
+ blockType: 'Paragraph',
+ format: {},
+ segments: [
+ {
+ segmentType: 'Text',
+ text: 'abc',
+ format: {},
+ },
+ ],
+ isImplicit: true,
+ },
+ ],
+ });
+ });
+
+ it('Add text with invisible unicode, feature disabled', () => {
+ const group = createContentModelDocument();
+ const context = createDomToModelContext();
+
+ addTextSegment(group, 'a\u{E0041}b\u{E0042}c', context);
+
+ expect(group).toEqual({
+ blockGroupType: 'Document',
+ blocks: [
+ {
+ blockType: 'Paragraph',
+ format: {},
+ segments: [
+ {
+ segmentType: 'Text',
+ text: 'a\u{E0041}b\u{E0042}c',
+ format: {},
+ },
+ ],
+ isImplicit: true,
+ },
+ ],
+ });
+ });
});
diff --git a/packages/roosterjs-content-model-dom/test/modelApi/common/stripInvisibleUnicodeTest.ts b/packages/roosterjs-content-model-dom/test/modelApi/common/stripInvisibleUnicodeTest.ts
new file mode 100644
index 000000000000..bae02f32005d
--- /dev/null
+++ b/packages/roosterjs-content-model-dom/test/modelApi/common/stripInvisibleUnicodeTest.ts
@@ -0,0 +1,46 @@
+import { stripInvisibleUnicode } from '../../../lib/modelApi/common/stripInvisibleUnicode';
+
+describe('stripInvisibleUnicode', () => {
+ it('should strip invisible unicode characters in the tag range', () => {
+ expect(stripInvisibleUnicode('a\u{E0041}b\u{E0042}c')).toBe('abc');
+ });
+
+ it('should strip all characters when input contains only invisible unicode', () => {
+ expect(stripInvisibleUnicode('\u{E0000}\u{E007F}\u{EFFFF}')).toBe('');
+ });
+
+ it('should strip characters at range boundaries (U+E0000 and U+EFFFF)', () => {
+ expect(stripInvisibleUnicode('\u{DFFFF}start\u{E0000}mid\u{EFFFF}end\u{F0000}')).toBe(
+ '\u{DFFFF}startmidend\u{F0000}'
+ );
+ });
+
+ it('should preserve meaningful invisible characters outside the tag range', () => {
+ // U+200B = Zero-Width Space, U+200D = Zero-Width Joiner,
+ // U+202E = Right-to-Left Override, U+202C = Pop Directional Formatting
+ const text = 'a\u{200B}b\u{200D}c\u{202E}d\u{202C}e';
+ expect(stripInvisibleUnicode(text)).toBe(text);
+ });
+
+ it('should strip tag-range chars while keeping meaningful invisible chars', () => {
+ expect(stripInvisibleUnicode('a\u{200B}\u{E0041}b\u{202E}\u{E0042}c')).toBe(
+ 'a\u{200B}b\u{202E}c'
+ );
+ });
+
+ it('should not modify visible characters', () => {
+ const text = 'hello world 你好';
+ expect(stripInvisibleUnicode(text)).toBe(text);
+ });
+
+ it('should return empty string for empty input', () => {
+ expect(stripInvisibleUnicode('')).toBe('');
+ });
+
+ it('should handle a long sequence of tag characters', () => {
+ const tags = Array.from({ length: 100 }, (_, i) => String.fromCodePoint(0xe0000 + i)).join(
+ ''
+ );
+ expect(stripInvisibleUnicode('before' + tags + 'after')).toBe('beforeafter');
+ });
+});
diff --git a/packages/roosterjs-content-model-dom/test/modelApi/creators/creatorsTest.ts b/packages/roosterjs-content-model-dom/test/modelApi/creators/creatorsTest.ts
index 2429877479b3..21dae0216a4e 100644
--- a/packages/roosterjs-content-model-dom/test/modelApi/creators/creatorsTest.ts
+++ b/packages/roosterjs-content-model-dom/test/modelApi/creators/creatorsTest.ts
@@ -233,6 +233,63 @@ describe('Creators', () => {
});
});
+ it('createText with invisible unicode characters does not strip by default', () => {
+ const text = 'a\u{E0041}b\u{E0042}c';
+ const result = createText(text);
+
+ expect(result).toEqual({
+ segmentType: 'Text',
+ format: {},
+ text: 'a\u{E0041}b\u{E0042}c',
+ });
+ });
+
+ it('createText with only invisible unicode characters does not strip by default', () => {
+ const text = '\u{E0000}\u{E007F}\u{EFFFF}';
+ const result = createText(text);
+
+ expect(result).toEqual({
+ segmentType: 'Text',
+ format: {},
+ text: '\u{E0000}\u{E007F}\u{EFFFF}',
+ });
+ });
+
+ it('createText with invisible unicode at boundary range does not strip by default', () => {
+ const text = '\u{DFFFF}start\u{E0000}mid\u{EFFFF}end\u{F0000}';
+ const result = createText(text);
+
+ expect(result).toEqual({
+ segmentType: 'Text',
+ format: {},
+ text: '\u{DFFFF}start\u{E0000}mid\u{EFFFF}end\u{F0000}',
+ });
+ });
+
+ it('createText preserves meaningful invisible characters outside the tag range', () => {
+ // = Zero-Width Space, = Zero-Width Joiner,
+ // = Right-to-Left Override, = Pop Directional Formatting
+ const text = 'abcde';
+ const result = createText(text);
+
+ expect(result).toEqual({
+ segmentType: 'Text',
+ format: {},
+ text: 'abcde',
+ });
+ });
+
+ it('createText does not strip visible characters', () => {
+ const text = 'hello world 你好 ';
+ const result = createText(text);
+
+ expect(result).toEqual({
+ segmentType: 'Text',
+ format: {},
+ text: 'hello world 你好 ',
+ });
+ });
+
it('createTableRow', () => {
const row = createTableRow();
diff --git a/packages/roosterjs-content-model-types/lib/editor/ExperimentalFeature.ts b/packages/roosterjs-content-model-types/lib/editor/ExperimentalFeature.ts
index 4b4e7f3be7a2..1d50197a8df3 100644
--- a/packages/roosterjs-content-model-types/lib/editor/ExperimentalFeature.ts
+++ b/packages/roosterjs-content-model-types/lib/editor/ExperimentalFeature.ts
@@ -64,4 +64,11 @@ export type ExperimentalFeature =
/**
* Transform the table border colors when switching from light to dark mode
*/
- | 'TransformTableBorderColors';
+ | 'TransformTableBorderColors'
+
+ /**
+ * Strip invisible unicode characters (U+E0000 to U+EFFFF) from text segments during DOM to Model conversion.
+ * These characters can be used to hide text in HTML and may cause unexpected behavior.
+ * @see https://embracethered.com/blog/posts/2024/hiding-and-finding-text-with-unicode-tags/
+ */
+ | 'FilterInvisibleUnicode';