Skip to content

Commit

Permalink
fix(@sanity): issue where hidden unicode characters were bloating doc…
Browse files Browse the repository at this point in the history
…ument in PTE (#6440)

* fix(portable-text-editor): issue shown in tests re stega. use duplicate code

* test(playwright-ct): add test

* chore(sanity): remove prettier linting

* test(sanity): fix missing snapshot

* test(sanity): update test after realising the test would pass always if comparing object number

* chore: test unicode removal

* chore: test unicode removal

* chore(@sanity): remove old solution

* fix(@sanity/block-tools): unicode issue. remove vercel/stega and move to block-tools

* test(@sanity/block-tools): for unicode

* fix(@sanity/block-tools): utf8 characters weren't beign filtered. using the vercel/stega

* chore: update lock file

* (chore): update pnpm lock
  • Loading branch information
RitaDias committed Apr 29, 2024
1 parent 0dae0da commit ffa68ec
Show file tree
Hide file tree
Showing 9 changed files with 353 additions and 184 deletions.
1 change: 1 addition & 0 deletions packages/@sanity/block-tools/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"@types/jsdom": "^20.0.0",
"@types/lodash": "^4.14.149",
"@types/react": "^18.3.1",
"@vercel/stega": "0.1.0",
"jsdom": "^23.0.1"
},
"publishConfig": {
Expand Down
29 changes: 27 additions & 2 deletions packages/@sanity/block-tools/src/HtmlDeserializer/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
isPortableTextTextBlock,
type PortableTextTextBlock,
} from '@sanity/types'
import {vercelStegaSplit} from '@vercel/stega'
import {isEqual} from 'lodash'

import {DEFAULT_BLOCK} from '../constants'
Expand Down Expand Up @@ -61,9 +62,10 @@ export function preprocess(
parseHtml: HtmlParser,
options: HtmlPreprocessorOptions,
): Document {
const doc = parseHtml(normalizeHtmlBeforePreprocess(html))
const cleanHTML = cleanStegaUnicode(html)
const doc = parseHtml(normalizeHtmlBeforePreprocess(cleanHTML))
preprocessors.forEach((processor) => {
processor(html, doc, options)
processor(cleanHTML, doc, options)
})
return doc
}
Expand Down Expand Up @@ -338,6 +340,29 @@ export function removeAllWhitespace(rootNode: Node) {
nodesToRemove.forEach((node) => node.parentElement?.removeChild(node))
}

/**
* This is a duplicate code from `@sanity/client/stega`
* Unfortunately, as it stands, the e2e process is pulling in the node version of `@sanity/client` and so we don't have access to the utility as it stands
* @todo remove once this utility is available in `@vercel/stega`
*
* Can take a `result` JSON from a `const {result} = client.fetch(query, params, {filterResponse: false})`
* and remove all stega-encoded data from it.
* @alpha
* @hidden
*/
export function cleanStegaUnicode(result: string): string {
try {
return JSON.parse(
JSON.stringify(result, (key, value) => {
if (typeof value !== 'string') return value
return vercelStegaSplit(value).cleaned
}),
)
} catch {
return result
}
}

function isWhitespaceBlock(elm: HTMLElement): boolean {
return ['p', 'br'].includes(tagName(elm) || '') && !elm.textContent?.trim()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import defaultSchema from '../../../fixtures/defaultSchema'
import {type BlockTestFn} from '../types'

const blockContentType = defaultSchema
.get('blogPost')
.fields.find((field: any) => field.name === 'body').type

const testFn: BlockTestFn = (html, blockTools, commonOptions) => {
return blockTools.htmlToBlocks(html, blockContentType, commonOptions)
}

export default testFn

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[
{
"_key": "randomKey1",
"_type": "block",
"children": [
{
"_key": "randomKey10",
"_type": "span",
"marks": [],
"text": "This is a test of the Sanity Portable Text renderer. We will use a variety of content to test its capabilities. Here are some unicode characters: ☺️👍🏽🌍🌞🌚🌝🌛🌜🌙💫⭐️🌟✨⚡️☄️💥🔥🌪🌈☀️🌤⛅️🌥☁️🌦🌧⛈🌩🌨❄️☃️⛄️🌬💨💧💦☔️☂️🌊🌫. Now let's try some markdown formatting: "
},
{"_key": "randomKey11", "_type": "span", "marks": ["strong"], "text": "bold text"},
{"_key": "randomKey12", "_type": "span", "marks": [], "text": ", "},
{"_key": "randomKey13", "_type": "span", "marks": ["em"], "text": "italic text"},
{"_key": "randomKey14", "_type": "span", "marks": [], "text": ", ~~strikethrough~~, "},
{"_key": "randomKey15", "_type": "span", "marks": ["code"], "text": "code"},
{
"_key": "randomKey16",
"_type": "span",
"marks": [],
"text": ", > blockquote, - list item, 1. numbered list item, "
},
{"_key": "randomKey17", "_type": "span", "marks": ["randomKey0"], "text": "link"},
{"_key": "randomKey18", "_type": "span", "marks": [], "text": ", ."}
],
"markDefs": [{"_key": "randomKey0", "_type": "link", "href": "https://example.com/"}],
"style": "normal"
}
]
33 changes: 33 additions & 0 deletions packages/@sanity/block-tools/tsdoc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"$schema": "https://developer.microsoft.com/json-schemas/tsdoc/v0/tsdoc.schema.json",
"tagDefinitions": [
{
"tagName": "@hidden",
"syntaxKind": "block",
"allowMultiple": true
},
{
"tagName": "@todo",
"syntaxKind": "block",
"allowMultiple": true
}
],
"supportForTags": {
"@hidden": true,
"@beta": true,
"@internal": true,
"@public": true,
"@experimental": true,
"@see": true,
"@link": true,
"@example": true,
"@deprecated": true,
"@alpha": true,
"@param": true,
"@returns": true,
"@remarks": true,
"@throws": true,
"@defaultValue": true,
"@todo": true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ import {type Path, type SanityDocument} from '@sanity/types'

import {testHelpers} from '../../../../utils/testHelpers'
import CopyPasteStory from './CopyPasteStory'
import {GDOCS_INPUT, NORMALIZED_INPUT_SNAPSHOT, REMOVED_INPUT_SNAPSHOT} from './input'
import {
CLEANED_UNICODE_INPUT_SNAPSHOT,
GDOCS_INPUT,
NORMALIZED_INPUT_SNAPSHOT,
REMOVED_INPUT_SNAPSHOT,
UNICODE_TEXT,
} from './input'

export type UpdateFn = () => {focusPath: Path; document: SanityDocument}

Expand Down Expand Up @@ -68,4 +74,31 @@ test.describe('Portable Text Input', () => {
await expect(documentState?.bodyNormalized?.length || 0).toEqual(snapshotLength)
})
})

test.describe('Should be able to paste text that has hidden unicode characters without bloating the PTE', () => {
test(`Removed unicode characters`, async ({mount, page}) => {
const {getFocusedPortableTextEditor, insertPortableTextCopyPaste, waitForDocumentState} =
testHelpers({page})

await mount(<CopyPasteStory document={document} />)

const $pte = await getFocusedPortableTextEditor('field-body')

await insertPortableTextCopyPaste(UNICODE_TEXT, $pte)

const documentState = await waitForDocumentState((documentStateValue) => {
return (documentStateValue?.body?.length || 0) > 0
})

// strigify is needed in these cases in order to get the correct length for the content within the children
// prettier-ignore
const bodyLength = await JSON.stringify(documentState?.body).length || 0
// prettier-ignore
const snapshotLength = JSON.stringify(CLEANED_UNICODE_INPUT_SNAPSHOT).length

// Ideally we would compare the snapshot with the document, but the keys will be different each time
// We therefore compare the length of the body to the snapshot length here instead.
await expect(bodyLength).toEqual(snapshotLength)
})
})
})

Large diffs are not rendered by default.

0 comments on commit ffa68ec

Please sign in to comment.