diff --git a/packages/super-editor/src/core/InputRule.js b/packages/super-editor/src/core/InputRule.js index 124832b7d6..6103634ee2 100644 --- a/packages/super-editor/src/core/InputRule.js +++ b/packages/super-editor/src/core/InputRule.js @@ -7,6 +7,7 @@ import { getTextContentFromNodes } from './helpers/getTextContentFromNodes.js'; import { isRegExp } from './utilities/isRegExp.js'; import { handleDocxPaste } from './inputRules/docx-paste/docx-paste.js'; import { flattenListsInHtml } from './inputRules/html/html-helpers.js'; +import { handleGoogleDocsHtml } from './inputRules/google-docs-paste/google-docs-paste.js'; export class InputRule { match; @@ -231,15 +232,22 @@ export function isWordHtml(html) { ); } +function isGoogleDocsHtml(html) { + return /docs-internal-guid-/.test(html); +} + /** * Handle HTML paste events. * * @param {String} html The HTML string to be pasted. * @param {Editor} editor The editor instance. + * @param {String} source HTML content source * @returns {Boolean} Returns true if the paste was handled. */ -export function handleHtmlPaste(html, editor) { - const cleanedHtml = htmlHandler(html, editor); +export function handleHtmlPaste(html, editor, source) { + let cleanedHtml; + if (source === 'google-docs') cleanedHtml = handleGoogleDocsHtml(html, editor); + else cleanedHtml = htmlHandler(html, editor); const doc = PMDOMParser.fromSchema(editor.schema).parse(cleanedHtml); const { dispatch, state } = editor.view; @@ -378,6 +386,8 @@ export function handleClipboardPaste({ editor, view }, html) { source = 'plain-text'; } else if (isWordHtml(html)) { source = 'word-html'; + } else if (isGoogleDocsHtml(html)) { + source = 'google-docs'; } else { source = 'browser-html'; } @@ -391,6 +401,9 @@ export function handleClipboardPaste({ editor, view }, html) { if (editor.options.mode === 'docx') { return handleDocxPaste(html, editor, view); } + break; + case 'google-docs': + return handleGoogleDocsHtml(html, editor, view); // falls through to browser-html handling when not in DOCX mode case 'browser-html': return handleHtmlPaste(html, editor); diff --git a/packages/super-editor/src/core/helpers/orderedListUtils.js b/packages/super-editor/src/core/helpers/orderedListUtils.js index 14a3e3c828..922e4e5c0d 100644 --- a/packages/super-editor/src/core/helpers/orderedListUtils.js +++ b/packages/super-editor/src/core/helpers/orderedListUtils.js @@ -23,7 +23,7 @@ const listIndexMap = { const createNumbering = (values, lvlText) => { return values.reduce((acc, value, index) => { - return acc.replace(`%${index + 1}`, value); + return value > 9 ? acc.replace(/^0/, '').replace(`%${index + 1}`, value) : acc.replace(`%${index + 1}`, value); }, lvlText); }; diff --git a/packages/super-editor/src/core/helpers/pasteListHelpers.js b/packages/super-editor/src/core/helpers/pasteListHelpers.js index 2ba0bd4a88..311159cf30 100644 --- a/packages/super-editor/src/core/helpers/pasteListHelpers.js +++ b/packages/super-editor/src/core/helpers/pasteListHelpers.js @@ -1,5 +1,5 @@ -export const extractListLevelStyles = (cssText, listId, level) => { - const pattern = new RegExp(`@list\\s+l${listId}:level${level}\\s*\\{([^}]+)\\}`, 'i'); +export const extractListLevelStyles = (cssText, listId, level, numId) => { + const pattern = new RegExp(`@list\\s+l${listId}:level${level}(?:\\s+lfo${numId})?\\s*\\{([^}]+)\\}`, 'i'); const match = cssText.match(pattern); if (!match) return null; @@ -77,3 +77,30 @@ export const startHelperMap = new Map([ ['upperRoman', getStartNumberFromRoman], ['bullet', () => 1], ]); + +export const googleNumDefMap = new Map([ + ['decimal', 'decimal'], + ['decimal-leading-zero', 'decimal'], + ['lower-alpha', 'lowerLetter'], + ['upper-alpha', 'upperLetter'], + ['lower-roman', 'lowerRoman'], + ['upper-roman', 'upperRoman'], + ['bullet', 'bullet'], +]); + +export const getLvlTextForGoogleList = (fmt, level, editor) => { + const bulletListDef = editor.converter.numbering.abstracts[0]; + const bulletDefForLevel = bulletListDef.elements.find( + (el) => el.name === 'w:lvl' && el.attributes?.['w:ilvl'] === (level - 1).toString(), + ); + const bulletLvlText = bulletDefForLevel.elements.find((el) => el.name === 'w:lvlText')?.attributes?.['w:val']; + + switch (fmt) { + case 'decimal-leading-zero': + return `0%${level}.`; + case 'bullet': + return bulletLvlText; + default: + return `%${level}.`; + } +}; diff --git a/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js b/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js index 62ca78f335..5bd08e0ab6 100644 --- a/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js +++ b/packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js @@ -44,7 +44,7 @@ export const handleDocxPaste = (html, editor, view) => { if (msoListMatch) { const [, abstractId, level, numId] = msoListMatch; - const styles = extractListLevelStyles(css, abstractId, level); + const styles = extractListLevelStyles(css, abstractId, level, numId) || {}; let start, numFmt, lvlText; if (type === 'listItem') { @@ -58,7 +58,7 @@ export const handleDocxPaste = (html, editor, view) => { // Get numbering format from Word styles const msoNumFormat = styles['mso-level-number-format'] || 'decimal'; numFmt = numDefMap.get(msoNumFormat); - const punc = item.children[0]?.innerText?.slice(-1) || '.'; + const punc = item.innerText?.match(/^\s*[a-zA-Z0-9]+([.()])/i)?.[1] || '.'; lvlText = numFmt === 'bullet' ? normalizeLvlTextChar(styles['mso-level-text']) : `%${level}${punc}`; const startGetter = startHelperMap.get(numFmt); @@ -83,6 +83,7 @@ export const handleDocxPaste = (html, editor, view) => { transformWordLists(tempDiv, editor); const doc = DOMParser.fromSchema(editor.schema).parse(tempDiv); + tempDiv.remove(); const { dispatch } = editor.view; diff --git a/packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js b/packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js new file mode 100644 index 0000000000..b1c282a014 --- /dev/null +++ b/packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js @@ -0,0 +1,166 @@ +import { DOMParser } from 'prosemirror-model'; +import { convertEmToPt, sanitizeHtml } from '../../InputRule.js'; +import { ListHelpers } from '../../helpers/list-numbering-helpers.js'; +import { createSingleItemList } from '../html/html-helpers.js'; +import { getLvlTextForGoogleList, googleNumDefMap } from '../../helpers/pasteListHelpers.js'; + +/** + * Main handler for pasted Google Docs content. + * + * @param {string} html The string being pasted + * @param {Editor} editor The SuperEditor instance + * @param {Object} view The ProseMirror view + * @returns + */ +export const handleGoogleDocsHtml = (html, editor, view) => { + // convert lists + const htmlWithPtSizing = convertEmToPt(html); + const cleanedHtml = sanitizeHtml(htmlWithPtSizing).innerHTML; + + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = cleanedHtml; + + const htmlWithMergedLists = mergeSeparateLists(tempDiv); + const flattenHtml = flattenListsInHtml(htmlWithMergedLists, editor); + + const doc = DOMParser.fromSchema(editor.schema).parse(flattenHtml); + tempDiv.remove(); + + const { dispatch } = editor.view; + if (!dispatch) return false; + + dispatch(view.state.tr.replaceSelectionWith(doc, true)); + return true; +}; + +/** + * Flattens lists to ensure each list contains exactly ONE list item. + */ +function flattenListsInHtml(container, editor) { + // Keep processing until all lists are flattened + let foundList; + while ((foundList = findListToFlatten(container))) { + flattenFoundList(foundList, editor); + } + + return container; +} + +/** + * Finds lists to be flattened + */ +function findListToFlatten(container) { + // First priority: unprocessed lists + let list = container.querySelector('ol:not([data-list-id]), ul:not([data-list-id])'); + if (list) return list; + + return null; +} + +/** + * Flattens a single list by: + * 1. Ensuring it has proper data-list-id + * 2. Splitting multi-item lists into single-item lists + * 3. Extracting nested lists and processing them recursively + */ +function flattenFoundList(listElem, editor) { + let NodeInterface; + if (editor.options.mockDocument) { + const win = editor.options.mockDocument.defaultView; + NodeInterface = win.Node; + } else { + NodeInterface = window.Node; + } + + const tag = listElem.tagName.toLowerCase(); + const rootListLevel = Number(listElem.children[0].getAttribute('aria-level')); + const rootListFmt = listElem.children[0].style['list-style-type'] || 'decimal'; + const start = listElem.getAttribute('start') || 1; + + // Google docs list doesn't have numId + const rootNumId = ListHelpers.getNewListId(editor); + + ListHelpers.generateNewListDefinition({ + numId: rootNumId, + listType: tag === 'ol' ? 'orderedList' : 'bulletList', + editor, + fmt: googleNumDefMap.get(rootListFmt), + level: (rootListLevel - 1).toString(), + start, + text: getLvlTextForGoogleList(rootListFmt, rootListLevel, editor), + }); + + // Create single-item lists for each item + const newLists = []; + + // Get all direct
  • children + const items = Array.from(listElem.children).filter((c) => c.tagName.toLowerCase() === 'li'); + + items.forEach((li) => { + const level = Number(li.getAttribute('aria-level')) - 1; + const listLevel = [level + 1]; + const nestedLists = getNestedLists([li.nextSibling]); + + // Create a new single-item list for this li + const newList = createSingleItemList({ li, tag, rootNumId, level, listLevel, editor, NodeInterface }); + newLists.push(newList); + + nestedLists.forEach((list) => { + newLists.push(list.cloneNode(true)); + }); + if (nestedLists.length && ['OL', 'UL'].includes(li.nextSibling.tagName)) { + li.nextSibling?.remove(); + } + }); + + // Replace the original list with the new single-item lists + const parent = listElem.parentNode; + const nextSibling = listElem.nextSibling; + parent.removeChild(listElem); + + newLists.forEach((list) => { + parent.insertBefore(list, nextSibling); + }); +} + +/** + * Recursive helper to find all nested lists for the list item + */ +function getNestedLists(nodes) { + let result = []; + + const nodesArray = Array.from(nodes).filter((n) => n !== null); + + for (let item of nodesArray) { + if (item.tagName === 'OL' || item.tagName === 'UL') { + result.push(item); + result.push(...getNestedLists(item.children)); + } + } + + return result; +} + +/** + * Method that combines separate lists with sequential start attribute into one list + * Google Docs list items could be presented as separate lists with sequential start attribute + */ +function mergeSeparateLists(container) { + const tempCont = container.cloneNode(true); + + const rootLevelLists = Array.from(tempCont.querySelectorAll('ol:not(ol ol):not(ul ol)') || []); + const mainList = rootLevelLists.find((list) => !list.getAttribute('start')); + const hasStartAttr = rootLevelLists.some((list) => list.getAttribute('start') !== null); + + if (hasStartAttr) { + const listsWithStartAttr = rootLevelLists.filter((list) => list.getAttribute('start') !== null); + for (let [index, item] of listsWithStartAttr.entries()) { + if (item.getAttribute('start') === (index + 2).toString()) { + mainList.append(...item.childNodes); + item.remove(); + } + } + } + + return tempCont; +} diff --git a/packages/super-editor/src/core/inputRules/html/html-helpers.js b/packages/super-editor/src/core/inputRules/html/html-helpers.js index b62a95f30e..3df5bfa045 100644 --- a/packages/super-editor/src/core/inputRules/html/html-helpers.js +++ b/packages/super-editor/src/core/inputRules/html/html-helpers.js @@ -105,7 +105,7 @@ function flattenFoundList(listElem, editor, NodeInterface) { nestedLists.forEach((nl) => nl.parentNode.removeChild(nl)); // Create a new single-item list for this li - const newList = createSingleItemList(li, tag, rootNumId, level, editor, NodeInterface); + const newList = createSingleItemList({ li, tag, rootNumId, level, editor, NodeInterface }); newLists.push(newList); // Add the nested lists (they'll be processed in the next iteration) @@ -127,7 +127,7 @@ function flattenFoundList(listElem, editor, NodeInterface) { /** * Creates a single-item list from an
  • element */ -function createSingleItemList(li, tag, rootNumId, level, editor, NodeInterface) { +export function createSingleItemList({ li, tag, rootNumId, level, listLevel, editor, NodeInterface }) { const localDoc = li.ownerDocument; const ELEMENT_NODE = NodeInterface.ELEMENT_NODE; const TEXT_NODE = NodeInterface.TEXT_NODE; @@ -163,7 +163,7 @@ function createSingleItemList(li, tag, rootNumId, level, editor, NodeInterface) newLi.setAttribute('data-num-fmt', listNumberingType); newLi.setAttribute('data-lvl-text', lvlText || ''); - newLi.setAttribute('data-list-level', JSON.stringify([level + 1])); + newLi.setAttribute('data-list-level', JSON.stringify(listLevel || [level + 1])); // Copy content from original li Array.from(li.childNodes).forEach((node) => { diff --git a/packages/super-editor/src/core/super-converter/v2/importer/listImporter.js b/packages/super-editor/src/core/super-converter/v2/importer/listImporter.js index 15994e060e..cae449dcc7 100644 --- a/packages/super-editor/src/core/super-converter/v2/importer/listImporter.js +++ b/packages/super-editor/src/core/super-converter/v2/importer/listImporter.js @@ -389,7 +389,7 @@ export const generateListPath = (level, numId, styleId, levels, docx) => { if (iLvl > 0) { for (let i = iLvl; i >= 0; i--) { const { start: lvlStart } = getListLevelDefinitionTag(numId, i, styleId, docx); - if (!levels[i]) levels[i] = Number(lvlStart); + if (!levels[i]) levels[i] = Number(lvlStart) || 1; path.unshift(levels[i]); } }