diff --git a/packages/super-editor/src/core/InputRule.js b/packages/super-editor/src/core/InputRule.js index 124832b7d6..6103634ee2 100644 --- a/packages/super-editor/src/core/InputRule.js +++ b/packages/super-editor/src/core/InputRule.js @@ -7,6 +7,7 @@ import { getTextContentFromNodes } from './helpers/getTextContentFromNodes.js'; import { isRegExp } from './utilities/isRegExp.js'; import { handleDocxPaste } from './inputRules/docx-paste/docx-paste.js'; import { flattenListsInHtml } from './inputRules/html/html-helpers.js'; +import { handleGoogleDocsHtml } from './inputRules/google-docs-paste/google-docs-paste.js'; export class InputRule { match; @@ -231,15 +232,22 @@ export function isWordHtml(html) { ); } +function isGoogleDocsHtml(html) { + return /docs-internal-guid-/.test(html); +} + /** * Handle HTML paste events. * * @param {String} html The HTML string to be pasted. * @param {Editor} editor The editor instance. + * @param {String} source HTML content source * @returns {Boolean} Returns true if the paste was handled. */ -export function handleHtmlPaste(html, editor) { - const cleanedHtml = htmlHandler(html, editor); +export function handleHtmlPaste(html, editor, source) { + let cleanedHtml; + if (source === 'google-docs') cleanedHtml = handleGoogleDocsHtml(html, editor); + else cleanedHtml = htmlHandler(html, editor); const doc = PMDOMParser.fromSchema(editor.schema).parse(cleanedHtml); const { dispatch, state } = editor.view; @@ -378,6 +386,8 @@ export function handleClipboardPaste({ editor, view }, html) { source = 'plain-text'; } else if (isWordHtml(html)) { source = 'word-html'; + } else if (isGoogleDocsHtml(html)) { + source = 'google-docs'; } else { source = 'browser-html'; } @@ -391,6 +401,9 @@ export function handleClipboardPaste({ editor, view }, html) { if (editor.options.mode === 'docx') { return handleDocxPaste(html, editor, view); } + break; + case 'google-docs': + return handleGoogleDocsHtml(html, editor, view); // falls through to browser-html handling when not in DOCX mode case 'browser-html': return handleHtmlPaste(html, editor); diff --git a/packages/super-editor/src/core/helpers/orderedListUtils.js b/packages/super-editor/src/core/helpers/orderedListUtils.js index 14a3e3c828..922e4e5c0d 100644 --- a/packages/super-editor/src/core/helpers/orderedListUtils.js +++ b/packages/super-editor/src/core/helpers/orderedListUtils.js @@ -23,7 +23,7 @@ const listIndexMap = { const createNumbering = (values, lvlText) => { return values.reduce((acc, value, index) => { - return acc.replace(`%${index + 1}`, value); + return value > 9 ? acc.replace(/^0/, '').replace(`%${index + 1}`, value) : acc.replace(`%${index + 1}`, value); }, lvlText); }; diff --git a/packages/super-editor/src/core/helpers/pasteListHelpers.js b/packages/super-editor/src/core/helpers/pasteListHelpers.js index 2ba0bd4a88..311159cf30 100644 --- a/packages/super-editor/src/core/helpers/pasteListHelpers.js +++ b/packages/super-editor/src/core/helpers/pasteListHelpers.js @@ -1,5 +1,5 @@ -export const extractListLevelStyles = (cssText, listId, level) => { - const pattern = new RegExp(`@list\\s+l${listId}:level${level}\\s*\\{([^}]+)\\}`, 'i'); +export const extractListLevelStyles = (cssText, listId, level, numId) => { + const pattern = new RegExp(`@list\\s+l${listId}:level${level}(?:\\s+lfo${numId})?\\s*\\{([^}]+)\\}`, 'i'); const match = cssText.match(pattern); if (!match) return null; @@ -77,3 +77,30 @@ export const startHelperMap = new Map([ ['upperRoman', getStartNumberFromRoman], ['bullet', () => 1], ]); + +export const googleNumDefMap = new Map([ + ['decimal', 'decimal'], + ['decimal-leading-zero', 'decimal'], + ['lower-alpha', 'lowerLetter'], + ['upper-alpha', 'upperLetter'], + ['lower-roman', 'lowerRoman'], + ['upper-roman', 'upperRoman'], + ['bullet', 'bullet'], +]); + +export const getLvlTextForGoogleList = (fmt, level, editor) => { + const bulletListDef = editor.converter.numbering.abstracts[0]; + const bulletDefForLevel = bulletListDef.elements.find( + (el) => el.name === 'w:lvl' && el.attributes?.['w:ilvl'] === (level - 1).toString(), + ); + const bulletLvlText = bulletDefForLevel.elements.find((el) => el.name === 'w:lvlText')?.attributes?.['w:val']; + + switch (fmt) { + case 'decimal-leading-zero': + return `0%${level}.`; + case 'bullet': + return bulletLvlText; + default: + return `%${level}.`; + } +}; diff --git a/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js b/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js index 62ca78f335..5bd08e0ab6 100644 --- a/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js +++ b/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js @@ -44,7 +44,7 @@ export const handleDocxPaste = (html, editor, view) => { if (msoListMatch) { const [, abstractId, level, numId] = msoListMatch; - const styles = extractListLevelStyles(css, abstractId, level); + const styles = extractListLevelStyles(css, abstractId, level, numId) || {}; let start, numFmt, lvlText; if (type === 'listItem') { @@ -58,7 +58,7 @@ export const handleDocxPaste = (html, editor, view) => { // Get numbering format from Word styles const msoNumFormat = styles['mso-level-number-format'] || 'decimal'; numFmt = numDefMap.get(msoNumFormat); - const punc = item.children[0]?.innerText?.slice(-1) || '.'; + const punc = item.innerText?.match(/^\s*[a-zA-Z0-9]+([.()])/i)?.[1] || '.'; lvlText = numFmt === 'bullet' ? normalizeLvlTextChar(styles['mso-level-text']) : `%${level}${punc}`; const startGetter = startHelperMap.get(numFmt); @@ -83,6 +83,7 @@ export const handleDocxPaste = (html, editor, view) => { transformWordLists(tempDiv, editor); const doc = DOMParser.fromSchema(editor.schema).parse(tempDiv); + tempDiv.remove(); const { dispatch } = editor.view; diff --git a/packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js b/packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js new file mode 100644 index 0000000000..b1c282a014 --- /dev/null +++ b/packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js @@ -0,0 +1,166 @@ +import { DOMParser } from 'prosemirror-model'; +import { convertEmToPt, sanitizeHtml } from '../../InputRule.js'; +import { ListHelpers } from '../../helpers/list-numbering-helpers.js'; +import { createSingleItemList } from '../html/html-helpers.js'; +import { getLvlTextForGoogleList, googleNumDefMap } from '../../helpers/pasteListHelpers.js'; + +/** + * Main handler for pasted Google Docs content. + * + * @param {string} html The string being pasted + * @param {Editor} editor The SuperEditor instance + * @param {Object} view The ProseMirror view + * @returns + */ +export const handleGoogleDocsHtml = (html, editor, view) => { + // convert lists + const htmlWithPtSizing = convertEmToPt(html); + const cleanedHtml = sanitizeHtml(htmlWithPtSizing).innerHTML; + + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = cleanedHtml; + + const htmlWithMergedLists = mergeSeparateLists(tempDiv); + const flattenHtml = flattenListsInHtml(htmlWithMergedLists, editor); + + const doc = DOMParser.fromSchema(editor.schema).parse(flattenHtml); + tempDiv.remove(); + + const { dispatch } = editor.view; + if (!dispatch) return false; + + dispatch(view.state.tr.replaceSelectionWith(doc, true)); + return true; +}; + +/** + * Flattens lists to ensure each list contains exactly ONE list item. + */ +function flattenListsInHtml(container, editor) { + // Keep processing until all lists are flattened + let foundList; + while ((foundList = findListToFlatten(container))) { + flattenFoundList(foundList, editor); + } + + return container; +} + +/** + * Finds lists to be flattened + */ +function findListToFlatten(container) { + // First priority: unprocessed lists + let list = container.querySelector('ol:not([data-list-id]), ul:not([data-list-id])'); + if (list) return list; + + return null; +} + +/** + * Flattens a single list by: + * 1. Ensuring it has proper data-list-id + * 2. Splitting multi-item lists into single-item lists + * 3. Extracting nested lists and processing them recursively + */ +function flattenFoundList(listElem, editor) { + let NodeInterface; + if (editor.options.mockDocument) { + const win = editor.options.mockDocument.defaultView; + NodeInterface = win.Node; + } else { + NodeInterface = window.Node; + } + + const tag = listElem.tagName.toLowerCase(); + const rootListLevel = Number(listElem.children[0].getAttribute('aria-level')); + const rootListFmt = listElem.children[0].style['list-style-type'] || 'decimal'; + const start = listElem.getAttribute('start') || 1; + + // Google docs list doesn't have numId + const rootNumId = ListHelpers.getNewListId(editor); + + ListHelpers.generateNewListDefinition({ + numId: rootNumId, + listType: tag === 'ol' ? 'orderedList' : 'bulletList', + editor, + fmt: googleNumDefMap.get(rootListFmt), + level: (rootListLevel - 1).toString(), + start, + text: getLvlTextForGoogleList(rootListFmt, rootListLevel, editor), + }); + + // Create single-item lists for each item + const newLists = []; + + // Get all direct