diff --git a/news/changelog-1.5.md b/news/changelog-1.5.md index c2795d3c6c4..56ea764431b 100644 --- a/news/changelog-1.5.md +++ b/news/changelog-1.5.md @@ -20,6 +20,7 @@ All changes included in 1.5: - ([#8711](https://github.com/quarto-dev/quarto-cli/issues/8711)): Enforce rendering of tables as `tabular` environments when custom float environments are present. - ([#8841](https://github.com/quarto-dev/quarto-cli/issues/8841)): Do not parse LaTeX table when crossref label doesn't start with `tbl-`. - ([#9582](https://github.com/quarto-dev/quarto-cli/issues/9582)): Forward column classes and attributes correctly to floats inside divs with column classes. +- ([#9729](https://github.com/quarto-dev/quarto-cli/issues/9729)): Fix performance issue with Lua pattern matching and multiple capture groups. ## RevealJS Format diff --git a/src/resources/filters/common/tables.lua b/src/resources/filters/common/tables.lua index 8e23c76f20b..f241a82200b 100644 --- a/src/resources/filters/common/tables.lua +++ b/src/resources/filters/common/tables.lua @@ -131,7 +131,7 @@ end function hasRawLatexTable(raw) if _quarto.format.isRawLatex(raw) and _quarto.format.isLatexOutput() then for i,pattern in ipairs(_quarto.patterns.latexTablePatterns) do - if raw.text:match(pattern) then + if _quarto.modules.patterns.match_all_in_table(pattern)(raw.text) then return true end end diff --git a/src/resources/filters/crossref/#crossref.lua# b/src/resources/filters/crossref/#crossref.lua# deleted file mode 100644 index cdc681af1ca..00000000000 --- a/src/resources/filters/crossref/#crossref.lua# +++ /dev/null @@ -1,82 +0,0 @@ - -- crossref.lua --- Copyright (C) 2020-2023 Posit Software, PBC - --- this is the standalone version of our crossref filters, used in the IDEs for auto-completion - --- required version -PANDOC_VERSION:must_be_at_least '2.13' - --- [import] -function import(script) - local path = PANDOC_SCRIPT_FILE:match("(.*[/\\])") - dofile(path .. script) -end - -import("../mainstateinit.lua") - -import("../ast/customnodes.lua") -import("../ast/emulatedfilter.lua") -import("../ast/parse.lua") -import("../ast/render.lua") -import("../ast/runemulation.lua") -import("../ast/traceexecution.lua") -import("../ast/wrappedwriter.lua") - - -import("index.lua") -import("preprocess.lua") -import("sections.lua") -import("figures.lua") -import("tables.lua") -import("equations.lua") -import("listings.lua") -import("theorems.lua") -import("qmd.lua") -import("refs.lua") -import("meta.lua") -import("format.lua") -import("options.lua") -import("../normalize/flags.lua") -import("../normalize/pandoc3.lua") -import("../common/lunacolors.lua") -import("../common/log.lua") -import("../common/pandoc.lua") -import("../common/format.lua") -import("../common/base64.lua") -import("../common/options.lua") -import("../common/refs.lua") -import("../common/filemetadata.lua") -import("../common/figures.lua") -import("../common/tables.lua") -import("../common/theorems.lua") -import("../common/meta.lua") -import("../common/table.lua") -import("../common/string.lua") -import("../common/debug.lua") -import("../common/layout.lua") - --- [/import] - -initCrossrefIndex() - --- chain of filters -return { - init_crossref_options(), - compute_flags(), - parse_pandoc3_figures(), - crossref_preprocess(), - crossref_preprocess_theorems(), - combineFilters({ - file_metadata(), - qmd(), - sections(), - crossref_figures(), - crossref_tables(), - equations(), - listings(), - crossref_theorems(), - }), - resolveRefs(), - crossrefMetaInject(), - writeIndex() -} \ No newline at end of file diff --git a/src/resources/filters/customnodes/floatreftarget.lua b/src/resources/filters/customnodes/floatreftarget.lua index aed7a485ae8..979745eb3b9 100644 --- a/src/resources/filters/customnodes/floatreftarget.lua +++ b/src/resources/filters/customnodes/floatreftarget.lua @@ -444,12 +444,16 @@ end, function(float) local made_fix = false local function fix_raw(is_star_env) local function set_raw(el) - if _quarto.format.isRawLatex(el) and el.text:match(_quarto.patterns.latexLongtablePattern) then + if _quarto.format.isRawLatex(el) and _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexLongtablePattern)(el.text) then made_fix = true local raw = el -- special case for longtable floats in LaTeX - local extended_pattern = "(.-)" .. _quarto.patterns.latexLongtablePattern .. "(.*)" - local longtable_preamble, longtable_begin, longtable_content, longtable_end, longtable_postamble = raw.text:match(extended_pattern) + local extended_pattern = {".-"} + for _, pattern in ipairs(_quarto.patterns.latexLongtablePattern) do + table.insert(extended_pattern, pattern) + end + table.insert(extended_pattern, ".*") + local longtable_preamble, longtable_begin, longtable_content, longtable_end, longtable_postamble = _quarto.modules.patterns.match_all_in_table(extended_pattern)(raw.text) if longtable_preamble == nil or longtable_begin == nil or longtable_content == nil or longtable_end == nil or longtable_postamble == nil then warn("Could not parse longtable parameters. This could happen because the longtable parameters\n" .. "are not well-formed or because of a bug in quarto. Please consider filing a bug report at\n" .. diff --git a/src/resources/filters/modules/patterns.lua b/src/resources/filters/modules/patterns.lua index e82aef027ac..95dac51ebcd 100644 --- a/src/resources/filters/modules/patterns.lua +++ b/src/resources/filters/modules/patterns.lua @@ -42,6 +42,29 @@ local latex_tabular = "(\\begin{tabular}.*\\end{tabular})" local latex_table = "(\\begin{table})(.*)(\\end{table})" local latex_table_star = "(\\begin{table%*})(.*)(\\end{table%*})" +local function combine_patterns(pattern_table) + local combined_pattern = {} + for i, v in ipairs(pattern_table) do + table.insert(combined_pattern, "(" .. v .. ")") + end + return table.concat(combined_pattern) +end + +-- see https://github.com/quarto-dev/quarto-cli/issues/9729#issuecomment-2122907870 +-- for why this is necessary. +local function match_all_in_table(pattern_table) + local function inner(text) + for i, v in ipairs(pattern_table) do + if text:match(v) == nil then + return nil + end + end + -- return the combined matches for the combined pattern + return text:match(combine_patterns(pattern_table)) + end + return inner +end + return { attr_identifier = attr_identifier, engine_escape = engine_escape, @@ -68,4 +91,7 @@ return { latex_tabular = latex_tabular, latex_table = latex_table, latex_table_star = latex_table_star, + + match_all_in_table = match_all_in_table, + combine_patterns = combine_patterns } \ No newline at end of file diff --git a/src/resources/filters/normalize/extractquartodom.lua b/src/resources/filters/normalize/extractquartodom.lua index a39feca4736..aa8246058a1 100644 --- a/src/resources/filters/normalize/extractquartodom.lua +++ b/src/resources/filters/normalize/extractquartodom.lua @@ -72,6 +72,12 @@ function extract_latex_quartomarkdown_commands() return nil end local text = el.text + -- provide an early exit if the text does not contain the pattern + -- because Lua's pattern matching apparently takes a long time + -- to fail: https://github.com/quarto-dev/quarto-cli/issues/9729 + if text:match("\\QuartoMarkdownBase64{") == nil then + return nil + end local pattern = "(.*)(\\QuartoMarkdownBase64{)([^}]*)(})(.*)" local pre, _, content, _, post = text:match(pattern) if pre == nil then @@ -103,12 +109,17 @@ function inject_vault_content_into_rawlatex() return nil -- luacov: enable end + local text = el.text + -- provide an early exit if the text does not contain the pattern + -- because Lua's pattern matching apparently takes a long time + -- to fail: https://github.com/quarto-dev/quarto-cli/issues/9729 + if el.text:match("3ab579b5%-63b4%-445d%-bc1d%-85bf6c4c04de") == nil then + return nil + end local pattern = "(.*)(3ab579b5%-63b4%-445d%-bc1d%-85bf6c4c04de%-[0-9]+)(.*)" - local text = el.text local pre, content_id, post = text:match(pattern) - while pre do local found = false vault.content = _quarto.ast.walk(vault.content, { diff --git a/src/resources/filters/normalize/flags.lua b/src/resources/filters/normalize/flags.lua index 5f3bddd2a3c..0c1512b5edb 100644 --- a/src/resources/filters/normalize/flags.lua +++ b/src/resources/filters/normalize/flags.lua @@ -56,8 +56,10 @@ function compute_flags() end if _quarto.format.isRawLatex(el) then - if (el.text:match(_quarto.patterns.latexLongtablePattern) and - not el.text:match(_quarto.patterns.latexCaptionPattern)) then + local long_table_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexLongtablePattern) + local caption_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexCaptionPattern) + if (long_table_match(el.text) and + not caption_match(el.text)) then flags.has_longtable_no_caption_fixup = true end end diff --git a/src/resources/filters/quarto-post/latex.lua b/src/resources/filters/quarto-post/latex.lua index a01d89f0c2f..d3c159a6fd0 100644 --- a/src/resources/filters/quarto-post/latex.lua +++ b/src/resources/filters/quarto-post/latex.lua @@ -436,10 +436,11 @@ function render_latex_fixups() return { RawBlock = function(raw) if _quarto.format.isRawLatex(raw) then - if (raw.text:match(_quarto.patterns.latexLongtablePattern) and - not raw.text:match(_quarto.patterns.latexCaptionPattern)) then + local long_table_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexLongtablePattern) + local caption_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexCaptionPattern) + if long_table_match(raw.text) and caption_match(raw.text) then raw.text = raw.text:gsub( - _quarto.patterns.latexLongtablePattern, "\\begin{longtable*}%2\\end{longtable*}", 1) + _quarto.modules.patterns.combine_patterns(_quarto.patterns.latexLongtablePattern), "\\begin{longtable*}%2\\end{longtable*}", 1) return raw end end diff --git a/src/resources/filters/quarto-pre/table-captions.lua b/src/resources/filters/quarto-pre/table-captions.lua index d12460805b0..ef50a29a01e 100644 --- a/src/resources/filters/quarto-pre/table-captions.lua +++ b/src/resources/filters/quarto-pre/table-captions.lua @@ -26,11 +26,14 @@ function table_captions() el = _quarto.ast.walk(el, { RawBlock = function(raw) if _quarto.format.isRawLatex(raw) then - if raw.text:match(_quarto.patterns.latexTabularPattern) and not raw.text:match(_quarto.patterns.latexTablePattern) then - raw.text = raw.text:gsub(_quarto.patterns.latexTabularPattern, - "\\begin{table}\n\\centering\n%1%2%3\n\\end{table}\n", - 1) - return raw + local tabular_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexTabularPattern) + local table_match = _quarto.modules.patterns.match_all_in_table(_quarto.patterns.latexTablePattern) + if tabular_match(raw.text) and not table_match(raw.text) then + raw.text = raw.text:gsub( + _quarto.modules.patterns.combine_patterns(_quarto.patterns.latexTabularPattern), + "\\begin{table}\n\\centering\n%1%2%3\n\\end{table}\n", + 1) + return raw end end end @@ -169,8 +172,10 @@ function applyTableCaptions(el, tblCaptions, tblLabels) idx = idx + 1 elseif hasRawLatexTable(raw) then for i,pattern in ipairs(_quarto.patterns.latexTablePatterns) do - if raw.text:match(pattern) then - raw.text = applyLatexTableCaption(raw.text, tblCaptions[idx], tblLabels[idx], pattern) + local match_fun = _quarto.modules.patterns.match_all_in_table(pattern) + if match_fun(raw.text) then + local combined_pattern = _quarto.modules.patterns.combine_patterns(pattern) + raw.text = applyLatexTableCaption(raw.text, tblCaptions[idx], tblLabels[idx], combined_pattern) break end end @@ -198,20 +203,22 @@ end function applyLatexTableCaption(latex, tblCaption, tblLabel, tablePattern) local latexCaptionPattern = _quarto.patterns.latexCaptionPattern + local latex_caption_match = _quarto.modules.patterns.match_all_in_table(latexCaptionPattern) -- insert caption if there is none - local beginCaption, caption = latex:match(latexCaptionPattern) + local beginCaption, caption = latex_caption_match(latex) if not beginCaption then latex = latex:gsub(tablePattern, "%1" .. "\n\\caption{ }\\tabularnewline\n" .. "%2%3", 1) end -- apply table caption and label - local beginCaption, captionText, endCaption = latex:match(latexCaptionPattern) + local beginCaption, captionText, endCaption = latex_caption_match(latex) if #tblCaption > 0 then captionText = stringEscape(tblCaption, "latex") end if #tblLabel > 0 then captionText = captionText .. " {#" .. tblLabel .. "}" end - latex = latex:gsub(latexCaptionPattern, "%1" .. captionText:gsub("%%", "%%%%") .. "%3", 1) + assert(captionText) + latex = latex:gsub(_quarto.modules.patterns.combine_patterns(latexCaptionPattern), "%1" .. captionText:gsub("%%", "%%%%") .. "%3", 1) return latex end diff --git a/src/resources/pandoc/datadir/init.lua b/src/resources/pandoc/datadir/init.lua index 8071dd882db..e9976b7c1d1 100644 --- a/src/resources/pandoc/datadir/init.lua +++ b/src/resources/pandoc/datadir/init.lua @@ -1705,32 +1705,32 @@ local function resolveServiceWorkers(serviceworkers) else return nil end - end +end -local latexTableWithOptionsPattern = "(\\begin{table}%[[^%]]+%])(.*)(\\end{table})" -local latexTablePattern = "(\\begin{table})(.*)(\\end{table})" -local latexLongtablePatternwWithPosAndAlign = "(\\begin{longtable}%[[^%]]+%]{[^\n]*})(.*)(\\end{longtable})" -local latexLongtablePatternWithPos = "(\\begin{longtable}%[[^%]]+%])(.*)(\\end{longtable})" -local latexLongtablePatternWithAlign = "(\\begin{longtable}{[^\n]*})(.*)(\\end{longtable})" -local latexLongtablePattern = "(\\begin{longtable})(.*)(\\end{longtable})" -local latexTabularPatternWithPosAndAlign = "(\\begin{tabular}%[[^%]]+%]{[^\n]*})(.*)(\\end{tabular})" -local latexTabularPatternWithPos = "(\\begin{tabular}%[[^%]]+%])(.*)(\\end{tabular})" -local latexTabularPatternWithAlign = "(\\begin{tabular}{[^\n]*})(.*)(\\end{tabular})" -local latexTabularPattern = "(\\begin{tabular})(.*)(\\end{tabular})" -local latexCaptionPattern = "(\\caption{)(.-)(}[^\n]*\n)" +local latexTableWithOptionsPattern_table = { "\\begin{table}%[[^%]]+%]", ".*", "\\end{table}" } +local latexTablePattern_table = { "\\begin{table}", ".*", "\\end{table}" } +local latexLongtablePatternWithPosAndAlign_table = { "\\begin{longtable}%[[^%]]+%]{[^\n]*}", ".*", "\\end{longtable}" } +local latexLongtablePatternWithPos_table = { "\\begin{longtable}%[[^%]]+%]", ".*", "\\end{longtable}" } +local latexLongtablePatternWithAlign_table = { "\\begin{longtable}{[^\n]*}", ".*", "\\end{longtable}" } +local latexLongtablePattern_table = { "\\begin{longtable}", ".*", "\\end{longtable}" } +local latexTabularPatternWithPosAndAlign_table = { "\\begin{tabular}%[[^%]]+%]{[^\n]*}", ".*", "\\end{tabular}" } +local latexTabularPatternWithPos_table = { "\\begin{tabular}%[[^%]]+%]", ".*", "\\end{tabular}" } +local latexTabularPatternWithAlign_table = { "\\begin{tabular}{[^\n]*}", ".*", "\\end{tabular}" } +local latexTabularPattern_table = { "\\begin{tabular}", ".*", "\\end{tabular}" } +local latexCaptionPattern_table = { "\\caption{", ".-", "}[^\n]*\n" } local latexTablePatterns = pandoc.List({ - latexTableWithOptionsPattern, - latexTablePattern, - latexLongtablePatternwWithPosAndAlign, - latexLongtablePatternWithPos, - latexLongtablePatternWithAlign, - latexLongtablePattern, - latexTabularPatternWithPosAndAlign, - latexTabularPatternWithPos, - latexTabularPatternWithAlign, - latexTabularPattern, + latexTableWithOptionsPattern_table, + latexTablePattern_table, + latexLongtablePatternWithPosAndAlign_table, + latexLongtablePatternWithPos_table, + latexLongtablePatternWithAlign_table, + latexLongtablePattern_table, + latexTabularPatternWithPosAndAlign_table, + latexTabularPatternWithPos_table, + latexTabularPatternWithAlign_table, + latexTabularPattern_table, }) -- global quarto params @@ -1876,11 +1876,11 @@ _quarto = { processDependencies = processDependencies, format = format, patterns = { - latexTabularPattern = latexTabularPattern, - latexTablePattern = latexTablePattern, - latexLongtablePattern = latexLongtablePattern, + latexTabularPattern = latexTabularPattern_table, + latexTablePattern = latexTablePattern_table, + latexLongtablePattern = latexLongtablePattern_table, latexTablePatterns = latexTablePatterns, - latexCaptionPattern = latexCaptionPattern + latexCaptionPattern = latexCaptionPattern_table }, utils = utils, withScriptFile = function(file, callback)