fix(@sanity): issue where hidden unicode characters were bloating doc…

…ument in PTE (#6440) * fix(portable-text-editor): issue shown in tests re stega. use duplicate code * test(playwright-ct): add test * chore(sanity): remove prettier linting * test(sanity): fix missing snapshot * test(sanity): update test after realising the test would pass always if comparing object number * chore: test unicode removal * chore: test unicode removal * chore(@sanity): remove old solution * fix(@sanity/block-tools): unicode issue. remove vercel/stega and move to block-tools * test(@sanity/block-tools): for unicode * fix(@sanity/block-tools): utf8 characters weren't beign filtered. using the vercel/stega * chore: update lock file * (chore): update pnpm lock
sanity-io · Apr 29, 2024 · ffa68ec · ffa68ec
1 parent 0dae0da
commit ffa68ec
Show file tree

Hide file tree

Showing 9 changed files with 353 additions and 184 deletions.
diff --git a/packages/@sanity/block-tools/package.json b/packages/@sanity/block-tools/package.json
@@ -60,6 +60,7 @@
     "@types/jsdom": "^20.0.0",
     "@types/lodash": "^4.14.149",
     "@types/react": "^18.3.1",
+    "@vercel/stega": "0.1.0",
     "jsdom": "^23.0.1"
   },
   "publishConfig": {

diff --git a/packages/@sanity/block-tools/src/HtmlDeserializer/helpers.ts b/packages/@sanity/block-tools/src/HtmlDeserializer/helpers.ts
@@ -3,6 +3,7 @@ import {
   isPortableTextTextBlock,
   type PortableTextTextBlock,
 } from '@sanity/types'
+import {vercelStegaSplit} from '@vercel/stega'
 import {isEqual} from 'lodash'
 
 import {DEFAULT_BLOCK} from '../constants'
@@ -61,9 +62,10 @@ export function preprocess(
   parseHtml: HtmlParser,
   options: HtmlPreprocessorOptions,
 ): Document {
-  const doc = parseHtml(normalizeHtmlBeforePreprocess(html))
+  const cleanHTML = cleanStegaUnicode(html)
+  const doc = parseHtml(normalizeHtmlBeforePreprocess(cleanHTML))
   preprocessors.forEach((processor) => {
-    processor(html, doc, options)
+    processor(cleanHTML, doc, options)
   })
   return doc
 }
@@ -338,6 +340,29 @@ export function removeAllWhitespace(rootNode: Node) {
   nodesToRemove.forEach((node) => node.parentElement?.removeChild(node))
 }
 
+/**
+ * This is a duplicate code from `@sanity/client/stega`
+ * Unfortunately, as it stands, the e2e process is pulling in the node version of `@sanity/client` and so we don't have access to the utility as it stands
+ * @todo remove once this utility is available in `@vercel/stega`
+ *
+ * Can take a `result` JSON from a `const {result} = client.fetch(query, params, {filterResponse: false})`
+ * and remove all stega-encoded data from it.
+ * @alpha
+ * @hidden
+ */
+export function cleanStegaUnicode(result: string): string {
+  try {
+    return JSON.parse(
+      JSON.stringify(result, (key, value) => {
+        if (typeof value !== 'string') return value
+        return vercelStegaSplit(value).cleaned
+      }),
+    )
+  } catch {
+    return result
+  }
+}
+
 function isWhitespaceBlock(elm: HTMLElement): boolean {
   return ['p', 'br'].includes(tagName(elm) || '') && !elm.textContent?.trim()
 }
diff --git a/packages/@sanity/block-tools/test/tests/HtmlDeserializer/stegaUnicodeCleaner/index.ts b/packages/@sanity/block-tools/test/tests/HtmlDeserializer/stegaUnicodeCleaner/index.ts
@@ -0,0 +1,12 @@
+import defaultSchema from '../../../fixtures/defaultSchema'
+import {type BlockTestFn} from '../types'
+
+const blockContentType = defaultSchema
+  .get('blogPost')
+  .fields.find((field: any) => field.name === 'body').type
+
+const testFn: BlockTestFn = (html, blockTools, commonOptions) => {
+  return blockTools.htmlToBlocks(html, blockContentType, commonOptions)
+}
+
+export default testFn
diff --git a/packages/@sanity/block-tools/test/tests/HtmlDeserializer/stegaUnicodeCleaner/input.html b/packages/@sanity/block-tools/test/tests/HtmlDeserializer/stegaUnicodeCleaner/input.html
diff --git a/packages/@sanity/block-tools/test/tests/HtmlDeserializer/stegaUnicodeCleaner/output.json b/packages/@sanity/block-tools/test/tests/HtmlDeserializer/stegaUnicodeCleaner/output.json
@@ -0,0 +1,29 @@
+[
+  {
+    "_key": "randomKey1",
+    "_type": "block",
+    "children": [
+      {
+        "_key": "randomKey10",
+        "_type": "span",
+        "marks": [],
+        "text": "This is a test of the Sanity Portable Text renderer. We will use a variety of content to test its capabilities. Here are some unicode characters: ☺️👍🏽🌍🌞🌚🌝🌛🌜🌙💫⭐️🌟✨⚡️☄️💥🔥🌪🌈☀️🌤⛅️🌥☁️🌦🌧⛈🌩🌨❄️☃️⛄️🌬💨💧💦☔️☂️🌊🌫. Now let's try some markdown formatting: "
+      },
+      {"_key": "randomKey11", "_type": "span", "marks": ["strong"], "text": "bold text"},
+      {"_key": "randomKey12", "_type": "span", "marks": [], "text": ", "},
+      {"_key": "randomKey13", "_type": "span", "marks": ["em"], "text": "italic text"},
+      {"_key": "randomKey14", "_type": "span", "marks": [], "text": ", ~~strikethrough~~, "},
+      {"_key": "randomKey15", "_type": "span", "marks": ["code"], "text": "code"},
+      {
+        "_key": "randomKey16",
+        "_type": "span",
+        "marks": [],
+        "text": ", > blockquote, - list item, 1. numbered list item, "
+      },
+      {"_key": "randomKey17", "_type": "span", "marks": ["randomKey0"], "text": "link"},
+      {"_key": "randomKey18", "_type": "span", "marks": [], "text": ", ."}
+    ],
+    "markDefs": [{"_key": "randomKey0", "_type": "link", "href": "https://example.com/"}],
+    "style": "normal"
+  }
+]
diff --git a/packages/@sanity/block-tools/tsdoc.json b/packages/@sanity/block-tools/tsdoc.json
@@ -0,0 +1,33 @@
+{
+  "$schema": "https://developer.microsoft.com/json-schemas/tsdoc/v0/tsdoc.schema.json",
+  "tagDefinitions": [
+    {
+      "tagName": "@hidden",
+      "syntaxKind": "block",
+      "allowMultiple": true
+    },
+    {
+      "tagName": "@todo",
+      "syntaxKind": "block",
+      "allowMultiple": true
+    }
+  ],
+  "supportForTags": {
+    "@hidden": true,
+    "@beta": true,
+    "@internal": true,
+    "@public": true,
+    "@experimental": true,
+    "@see": true,
+    "@link": true,
+    "@example": true,
+    "@deprecated": true,
+    "@alpha": true,
+    "@param": true,
+    "@returns": true,
+    "@remarks": true,
+    "@throws": true,
+    "@defaultValue": true,
+    "@todo": true
+  }
+}
diff --git a/...s/sanity/playwright-ct/tests/formBuilder/inputs/PortableText/copyPaste/CopyPaste.spec.tsx b/...s/sanity/playwright-ct/tests/formBuilder/inputs/PortableText/copyPaste/CopyPaste.spec.tsx
@@ -4,7 +4,13 @@ import {type Path, type SanityDocument} from '@sanity/types'
 
 import {testHelpers} from '../../../../utils/testHelpers'
 import CopyPasteStory from './CopyPasteStory'
-import {GDOCS_INPUT, NORMALIZED_INPUT_SNAPSHOT, REMOVED_INPUT_SNAPSHOT} from './input'
+import {
+  CLEANED_UNICODE_INPUT_SNAPSHOT,
+  GDOCS_INPUT,
+  NORMALIZED_INPUT_SNAPSHOT,
+  REMOVED_INPUT_SNAPSHOT,
+  UNICODE_TEXT,
+} from './input'
 
 export type UpdateFn = () => {focusPath: Path; document: SanityDocument}
 
@@ -68,4 +74,31 @@ test.describe('Portable Text Input', () => {
       await expect(documentState?.bodyNormalized?.length || 0).toEqual(snapshotLength)
     })
   })
+
+  test.describe('Should be able to paste text that has hidden unicode characters without bloating the PTE', () => {
+    test(`Removed unicode characters`, async ({mount, page}) => {
+      const {getFocusedPortableTextEditor, insertPortableTextCopyPaste, waitForDocumentState} =
+        testHelpers({page})
+
+      await mount(<CopyPasteStory document={document} />)
+
+      const $pte = await getFocusedPortableTextEditor('field-body')
+
+      await insertPortableTextCopyPaste(UNICODE_TEXT, $pte)
+
+      const documentState = await waitForDocumentState((documentStateValue) => {
+        return (documentStateValue?.body?.length || 0) > 0
+      })
+
+      // strigify is needed in these cases in order to get the correct length for the content within the children
+      // prettier-ignore
+      const bodyLength = await JSON.stringify(documentState?.body).length || 0
+      // prettier-ignore
+      const snapshotLength = JSON.stringify(CLEANED_UNICODE_INPUT_SNAPSHOT).length
+
+      // Ideally we would compare the snapshot with the document, but the keys will be different each time
+      // We therefore compare the length of the body to the snapshot length here instead.
+      await expect(bodyLength).toEqual(snapshotLength)
+    })
+  })
 })
diff --git a/packages/sanity/playwright-ct/tests/formBuilder/inputs/PortableText/copyPaste/input.ts b/packages/sanity/playwright-ct/tests/formBuilder/inputs/PortableText/copyPaste/input.ts