From 44b479e060ef713557b3de9e5edf091d44478862 Mon Sep 17 00:00:00 2001 From: Dmytro Kirpa Date: Wed, 18 Mar 2026 17:30:15 +0100 Subject: [PATCH 1/2] fix(react-avatar): support initials calculation for GB18030-2022 extension characters --- ...-b3b582c1-9dae-4459-9787-66b6167b0458.json | 7 ++++ .../library/src/utils/getInitials.test.ts | 20 +++++++++++ .../library/src/utils/getInitials.ts | 33 ++++++++++++------- 3 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 change/@fluentui-react-avatar-b3b582c1-9dae-4459-9787-66b6167b0458.json diff --git a/change/@fluentui-react-avatar-b3b582c1-9dae-4459-9787-66b6167b0458.json b/change/@fluentui-react-avatar-b3b582c1-9dae-4459-9787-66b6167b0458.json new file mode 100644 index 0000000000000..7a57a541c91dc --- /dev/null +++ b/change/@fluentui-react-avatar-b3b582c1-9dae-4459-9787-66b6167b0458.json @@ -0,0 +1,7 @@ +{ + "type": "patch", + "comment": "fix: support initials calculation for GB18030-2022 extension characters", + "packageName": "@fluentui/react-avatar", + "email": "dmytrokirpa@microsoft.com", + "dependentChangeType": "patch" +} diff --git a/packages/react-components/react-avatar/library/src/utils/getInitials.test.ts b/packages/react-components/react-avatar/library/src/utils/getInitials.test.ts index f0c3726ca8725..473568a882c9e 100644 --- a/packages/react-components/react-avatar/library/src/utils/getInitials.test.ts +++ b/packages/react-components/react-avatar/library/src/utils/getInitials.test.ts @@ -106,6 +106,26 @@ describe('getInitials', () => { expect(result).toEqual(''); }); + it('calculates initials for GB18030-2022 extension characters (CJK Ext B-I)', () => { + // These characters are encoded as surrogate pairs; the character itself should be returned as the initial + expect(getInitials('𬸚', false)).toEqual('𬸚'); // GFZB-196 + expect(getInitials('𢃾', false)).toEqual('𢃾'); // CJK Ext B + expect(getInitials('𪜀', false)).toEqual('𪜀'); // CJK Ext C + expect(getInitials('𫜴', false)).toEqual('𫜴'); // CJK Ext C + expect(getInitials('𫟰', false)).toEqual('𫟰'); // CJK Ext D + expect(getInitials('𬺠', false)).toEqual('𬺠'); // CJK Ext E + expect(getInitials('𮓇', false)).toEqual('𮓇'); // CJK Ext F + expect(getInitials('𪛝', false)).toEqual('𪛝'); // BX + expect(getInitials('𰉖', false)).toEqual('𰉖'); // GX + expect(getInitials('𱘍', false)).toEqual('𱘍'); // HX + expect(getInitials('𮯰', false)).toEqual('𮯰'); // IX + }); + + it('calculates initials for mixed strings starting with GB18030-2022 extension characters', () => { + // First code point of a mixed string should be used as the initial + expect(getInitials('𫚭齅䶱5𮯠灋𬘭r𫟼蝌龯𪛒𪛛㊣𫜹⾢Z𱔟𫍲𮴋䶺𰆬a', false)).toEqual('𫚭'); + }); + it('calculates an expected initials for Japanese names', () => { let result = getInitials('松田', false); expect(result).toEqual(''); diff --git a/packages/react-components/react-avatar/library/src/utils/getInitials.ts b/packages/react-components/react-avatar/library/src/utils/getInitials.ts index a6c48dc168d64..833e5b1f3862c 100644 --- a/packages/react-components/react-avatar/library/src/utils/getInitials.ts +++ b/packages/react-components/react-avatar/library/src/utils/getInitials.ts @@ -10,9 +10,11 @@ const UNWANTED_ENCLOSURES_REGEX: RegExp = /[\(\[\{][^\)\]\}]*[\)\]\}]/g; /** * Regular expression matching special ASCII characters except space, plus some unicode special characters. - * Applies after unwanted enclosures have been removed + * Applies after unwanted enclosures have been removed. + * Note: the range starts at \uE000 (not \uD800) to avoid matching surrogate code units, which would break + * supplementary Unicode characters (encoded as surrogate pairs in UTF-16) such as GB18030-2022 extension characters. */ -const UNWANTED_CHARS_REGEX: RegExp = /[\0-\u001F\!-/:-@\[-`\{-\u00BF\u0250-\u036F\uD800-\uFFFF]/g; +const UNWANTED_CHARS_REGEX: RegExp = /[\0-\u001F\!-/:-@\[-`\{-\u00BF\u0250-\u036F\uE000-\uFFFF]/g; /** * Regular expression matching phone numbers. Applied after chars matching UNWANTED_CHARS_REGEX have been removed @@ -28,30 +30,34 @@ const MULTIPLE_WHITESPACES_REGEX: RegExp = /\s+/g; * Arabic: Arabic, Arabic Supplement, Arabic Extended-A. * Korean: Hangul Jamo, Hangul Compatibility Jamo, Hangul Jamo Extended-A, Hangul Syllables, Hangul Jamo Extended-B. * Japanese: Hiragana, Katakana. - * CJK: CJK Unified Ideographs Extension A, CJK Unified Ideographs, CJK Compatibility Ideographs, - * CJK Unified Ideographs Extension B + * CJK: CJK Unified Ideographs Extension A, CJK Unified Ideographs, CJK Compatibility Ideographs. + * Note: Supplementary CJK characters (GB18030-2022 extension characters in Ext B-I) are intentionally not listed + * here so they can be rendered as initials. */ const UNSUPPORTED_TEXT_REGEX: RegExp = - /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]|[\uD840-\uD869][\uDC00-\uDED6]/; + /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/; function getInitialsLatin(displayName: string, isRtl: boolean, firstInitialOnly?: boolean): string { let initials = ''; const splits: string[] = displayName.split(' '); if (splits.length !== 0) { - initials += splits[0].charAt(0).toUpperCase(); + // Use code point iteration to correctly handle supplementary characters (e.g. GB18030-2022 extension chars) + // that are encoded as surrogate pairs; charAt(0) would only return half of such a character. + initials += ([...splits[0]][0] ?? '').toUpperCase(); } if (!firstInitialOnly) { if (splits.length === 2) { - initials += splits[1].charAt(0).toUpperCase(); + initials += ([...splits[1]][0] ?? '').toUpperCase(); } else if (splits.length === 3) { - initials += splits[2].charAt(0).toUpperCase(); + initials += ([...splits[2]][0] ?? '').toUpperCase(); } } - if (isRtl && initials.length > 1) { - return initials.charAt(1) + initials.charAt(0); + if (isRtl && [...initials].length > 1) { + const chars = [...initials]; + return chars[1] + chars[0]; } return initials; @@ -95,9 +101,12 @@ export function getInitials( displayName = cleanupDisplayName(displayName); - // For names containing CJK characters, and phone numbers, we don't display initials + // Check only the first code point against UNSUPPORTED_TEXT_REGEX so that names starting with a supported + // character (e.g. GB18030-2022 extension characters) produce an initial even when the rest of the string + // contains BMP CJK characters that would otherwise trigger the regex. + const firstCodePoint = [...displayName][0] ?? ''; if ( - UNSUPPORTED_TEXT_REGEX.test(displayName) || + UNSUPPORTED_TEXT_REGEX.test(firstCodePoint) || (!options?.allowPhoneInitials && PHONENUMBER_REGEX.test(displayName)) ) { return ''; From 28c09703c894e6a36a3fd57f228718805cc3c2c5 Mon Sep 17 00:00:00 2001 From: Dmytro Kirpa Date: Wed, 18 Mar 2026 17:48:08 +0100 Subject: [PATCH 2/2] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../library/src/utils/getInitials.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/packages/react-components/react-avatar/library/src/utils/getInitials.ts b/packages/react-components/react-avatar/library/src/utils/getInitials.ts index 833e5b1f3862c..54e1c955e3a1f 100644 --- a/packages/react-components/react-avatar/library/src/utils/getInitials.ts +++ b/packages/react-components/react-avatar/library/src/utils/getInitials.ts @@ -37,21 +37,30 @@ const MULTIPLE_WHITESPACES_REGEX: RegExp = /\s+/g; const UNSUPPORTED_TEXT_REGEX: RegExp = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]/; +function getFirstCodePoint(value: string): string { + if (!value) { + return ''; + } + + const codePoint = value.codePointAt(0); + return codePoint === undefined ? '' : String.fromCodePoint(codePoint); +} + function getInitialsLatin(displayName: string, isRtl: boolean, firstInitialOnly?: boolean): string { let initials = ''; const splits: string[] = displayName.split(' '); if (splits.length !== 0) { - // Use code point iteration to correctly handle supplementary characters (e.g. GB18030-2022 extension chars) + // Use code point-aware helper to correctly handle supplementary characters (e.g. GB18030-2022 extension chars) // that are encoded as surrogate pairs; charAt(0) would only return half of such a character. - initials += ([...splits[0]][0] ?? '').toUpperCase(); + initials += getFirstCodePoint(splits[0]).toUpperCase(); } if (!firstInitialOnly) { if (splits.length === 2) { - initials += ([...splits[1]][0] ?? '').toUpperCase(); + initials += getFirstCodePoint(splits[1]).toUpperCase(); } else if (splits.length === 3) { - initials += ([...splits[2]][0] ?? '').toUpperCase(); + initials += getFirstCodePoint(splits[2]).toUpperCase(); } } @@ -104,7 +113,7 @@ export function getInitials( // Check only the first code point against UNSUPPORTED_TEXT_REGEX so that names starting with a supported // character (e.g. GB18030-2022 extension characters) produce an initial even when the rest of the string // contains BMP CJK characters that would otherwise trigger the regex. - const firstCodePoint = [...displayName][0] ?? ''; + const firstCodePoint = getFirstCodePoint(displayName); if ( UNSUPPORTED_TEXT_REGEX.test(firstCodePoint) || (!options?.allowPhoneInitials && PHONENUMBER_REGEX.test(displayName))