Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions apps/sim/tools/gmail/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ describe('plainTextToHtml', () => {
})

describe('htmlToPlainText', () => {
it('strips tags, decodes entities, and collapses whitespace', () => {
it('strips tags and decodes entities', () => {
const result = htmlToPlainText('<p>Hi &amp; bye</p><p>Line<br>break</p>')
expect(result).toBe('Hi & bye\nLine\nbreak')
expect(result).toBe('Hi & bye\n\nLine\nbreak')
})

it('drops <style> and <script> contents', () => {
Expand All @@ -97,10 +97,21 @@ describe('htmlToPlainText', () => {
})

it('decodes decimal and hexadecimal numeric entities', () => {
expect(htmlToPlainText('<p>&#8220;hi&#8221; &#160;and&#x2019;s</p>')).toBe(
'\u201chi\u201d \u00a0and\u2019s'
expect(htmlToPlainText('<p>&#8220;hi&#8221; and&#x2019;s</p>')).toBe(
'\u201chi\u201d and\u2019s'
)
})
Comment thread
waleedlatif1 marked this conversation as resolved.

it('preserves &#160; (non-breaking space) as U+00A0 for fidelity in plain-text output', () => {
expect(htmlToPlainText('<p>a&#160;b</p>')).toBe('a\u00a0b')
})

it('elides anchor URLs that exactly match link text, and drops bare # anchors', () => {
expect(
htmlToPlainText('<p>Visit <a href="https://example.com">https://example.com</a></p>')
).toBe('Visit https://example.com')
expect(htmlToPlainText('<p><a href="#section">Anchor</a></p>')).toBe('Anchor')
})
})

describe('buildSimpleEmailMessage', () => {
Expand Down
32 changes: 14 additions & 18 deletions apps/sim/tools/gmail/utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { convert } from 'html-to-text'
import type {
GmailAttachment,
GmailMessage,
Expand Down Expand Up @@ -344,26 +345,21 @@ export function plainTextToHtml(body: string): string {
}

/**
* Best-effort conversion of an HTML body to a plain-text fallback. Strips tags
* and decodes the common entities. Used so we always include a plain-text part
* alongside HTML for clients that don't render HTML.
* Best-effort conversion of an HTML body to a plain-text fallback. Used so we
* always include a plain-text part alongside HTML for clients that don't render
* HTML. Delegates to the `html-to-text` library for robust tag stripping and
* entity decoding (also used elsewhere in the repo for the same purpose).
*/
export function htmlToPlainText(html: string): string {
return html
.replace(/<style[\s\S]*?<\/style>/gi, '')
.replace(/<script[\s\S]*?<\/script>/gi, '')
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/(p|div|h[1-6]|li|tr)>/gi, '\n')
.replace(/<[^>]+>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
.replace(/&amp;/g, '&')
.replace(/\n{3,}/g, '\n\n')
.trim()
return convert(html, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { hideLinkHrefIfSameAsText: true, noAnchorUrl: true } },
{ selector: 'img', format: 'skip' },
{ selector: 'script', format: 'skip' },
{ selector: 'style', format: 'skip' },
],
})
}

/**
Expand Down
Loading