simstudioai · waleedlatif1 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/apps/sim/tools/gmail/utils.test.ts b/apps/sim/tools/gmail/utils.test.ts
@@ -81,9 +81,9 @@ describe('plainTextToHtml', () => {
 })
 
 describe('htmlToPlainText', () => {
-  it('strips tags, decodes entities, and collapses whitespace', () => {
+  it('strips tags and decodes entities', () => {
     const result = htmlToPlainText('<p>Hi &amp; bye</p><p>Line<br>break</p>')
-    expect(result).toBe('Hi & bye\nLine\nbreak')
+    expect(result).toBe('Hi & bye\n\nLine\nbreak')
   })
 
   it('drops <style> and <script> contents', () => {
@@ -97,10 +97,21 @@ describe('htmlToPlainText', () => {
   })
 
   it('decodes decimal and hexadecimal numeric entities', () => {
-    expect(htmlToPlainText('<p>&#8220;hi&#8221; &#160;and&#x2019;s</p>')).toBe(
-      '\u201chi\u201d \u00a0and\u2019s'
+    expect(htmlToPlainText('<p>&#8220;hi&#8221; and&#x2019;s</p>')).toBe(
+      '\u201chi\u201d and\u2019s'
     )
   })
+
+  it('preserves &#160; (non-breaking space) as U+00A0 for fidelity in plain-text output', () => {
+    expect(htmlToPlainText('<p>a&#160;b</p>')).toBe('a\u00a0b')
+  })
+
+  it('elides anchor URLs that exactly match link text, and drops bare # anchors', () => {
+    expect(
+      htmlToPlainText('<p>Visit <a href="https://example.com">https://example.com</a></p>')
+    ).toBe('Visit https://example.com')
+    expect(htmlToPlainText('<p><a href="#section">Anchor</a></p>')).toBe('Anchor')
+  })
 })
 
 describe('buildSimpleEmailMessage', () => {

diff --git a/apps/sim/tools/gmail/utils.ts b/apps/sim/tools/gmail/utils.ts
@@ -1,3 +1,4 @@
+import { convert } from 'html-to-text'
 import type {
   GmailAttachment,
   GmailMessage,
@@ -344,26 +345,21 @@ export function plainTextToHtml(body: string): string {
 }
 
 /**
- * Best-effort conversion of an HTML body to a plain-text fallback. Strips tags
- * and decodes the common entities. Used so we always include a plain-text part
- * alongside HTML for clients that don't render HTML.
+ * Best-effort conversion of an HTML body to a plain-text fallback. Used so we
+ * always include a plain-text part alongside HTML for clients that don't render
+ * HTML. Delegates to the `html-to-text` library for robust tag stripping and
+ * entity decoding (also used elsewhere in the repo for the same purpose).
  */
 export function htmlToPlainText(html: string): string {
-  return html
-    .replace(/<style[\s\S]*?<\/style>/gi, '')
-    .replace(/<script[\s\S]*?<\/script>/gi, '')
-    .replace(/<br\s*\/?>/gi, '\n')
-    .replace(/<\/(p|div|h[1-6]|li|tr)>/gi, '\n')
-    .replace(/<[^>]+>/g, '')
-    .replace(/&nbsp;/g, ' ')
-    .replace(/&lt;/g, '<')
-    .replace(/&gt;/g, '>')
-    .replace(/&quot;/g, '"')
-    .replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
-    .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
-    .replace(/&amp;/g, '&')
-    .replace(/\n{3,}/g, '\n\n')
-    .trim()
+  return convert(html, {
+    wordwrap: false,
+    selectors: [
+      { selector: 'a', options: { hideLinkHrefIfSameAsText: true, noAnchorUrl: true } },
+      { selector: 'img', format: 'skip' },
+      { selector: 'script', format: 'skip' },
+      { selector: 'style', format: 'skip' },
+    ],
+  })
 }
 
 /**