From ebc188e342088d7d24623b20433020eb8b04d10f Mon Sep 17 00:00:00 2001 From: Rich Chiodo Date: Wed, 20 Nov 2019 16:44:54 -0800 Subject: [PATCH 1/3] First try --- .../interactive-common/latexManipulation.ts | 86 ++++++++++++------- .../latexManipulation.unit.test.ts | 36 +++++++- 2 files changed, 90 insertions(+), 32 deletions(-) diff --git a/src/datascience-ui/interactive-common/latexManipulation.ts b/src/datascience-ui/interactive-common/latexManipulation.ts index e848ffaf1571..19094ce5b632 100644 --- a/src/datascience-ui/interactive-common/latexManipulation.ts +++ b/src/datascience-ui/interactive-common/latexManipulation.ts @@ -1,38 +1,66 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly. -export function fixLatexEquations(input: string): string { - const block = '\n$$\n'; - const beginIndexes = getAllIndexesOfRegex(input, /\\begin\{[a-z]*\*?\}/g); - const endIndexes = getAllIndexesOfRegex(input, /\\end\{[a-z]*\*?\}/g); +// tslint:disable-next-line:no-require-imports no-var-requires +const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp'); - if (beginIndexes.length === endIndexes.length) { - for (let i = 0; i < beginIndexes.length; i += 1) { - const endOfEnd = input.indexOf('}', endIndexes[i] + 1 + 8 * i); +function appendMatch(input: string) - // Edge case, if the input starts with the latex formula we add the block at the beggining. - if (beginIndexes[i] === 0 && input[beginIndexes[i]] === '\\') { - input = block + input.slice(0, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length); - // Normal case, if the latex formula starts with a '$' we don't do anything. - // Otherwise, we insert the block at the beginning and ending of the latex formula. - } else if (input[beginIndexes[i] - 1] !== '$') { - input = input.slice(0, beginIndexes[i] + block.length * 2 * i) + block + input.slice(beginIndexes[i] + block.length * 2 * i, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length); +// Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly. +export function fixLatexEquations(input: string): string { + const output: string[] = []; + + // Search for begin/end pairs, outputting as we go + let start = 0; + const appendMatch = (beginIndex: endRegex: RegExp, beginIndex: number) + while (start < input.length) { + // First $$ + const startDollars = /\$\$/.exec(input.substr(start)); + // Then $ + const startDollar = /\$/.exec(input.substr(start)); + // Then /begin{name*} + const begin = /\\begin\{([a-z,\*]+)\}/.exec(input.substr(start)); + if (startDollars && startDollars.index < begin.index) { + // Output till the next $$ + const offset = startDollars.index + 1 + start; + const endDollars = /\$\$/.exec(input.substr(offset)); + if (endDollars) { + const length = endDollars.index + 2 + offset; + output.push(input.substr(start, length)); + start = start + length; + } else { + // Invalid, just return + return input; + } + } else if (startDollar) { + // Output till the next $ + const offset = startDollar.index + 1 + start; + const endDollar = /\$/.exec(input.substr(offset)); + if (endDollar) { + const length = endDollar.index + 1 + offset; + output.push(input.substr(start, length)); + start = start + length; + } else { + // Invalid, just return + return input; + } + } else if (begin && begin.length > 1) { + const offset = begin.index + start; + const endRegex = new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`); + const end = endRegex.exec(input.substr(start)); + if (end) { + const prefix = input.substr(start, begin.index); + const wrapped = input.substr(offset, `\\end{${begin[1]}}`.length + end.index - begin.index); + output.push(`${prefix}\n$$\n${wrapped}\n$$\n`); + start = start + prefix.length + wrapped.length; + } else { + // Invalid, just return + return input; } + } else { + output.push(input.substr(start)); + start = input.length; } } - - return input; -} - -function getAllIndexesOfRegex(arr: string, value: RegExp): number[] { - const indexes = []; - let result; - - // tslint:disable-next-line: no-conditional-assignment - while ((result = value.exec(arr)) !== null) { - indexes.push(result.index); - } - - return indexes; + return output.join(''); } diff --git a/src/test/datascience/latexManipulation.unit.test.ts b/src/test/datascience/latexManipulation.unit.test.ts index f20e044b8ec3..4ff25f292b66 100644 --- a/src/test/datascience/latexManipulation.unit.test.ts +++ b/src/test/datascience/latexManipulation.unit.test.ts @@ -85,18 +85,48 @@ $$ $$ `; - test('Latex - Equations don\'t have $$', () => { + const markdown4 = ` +$$ +\begin{equation*} +\mathbf{V}_1 \times \mathbf{V}_2 = \begin{vmatrix} +\mathbf{i} & \mathbf{j} & \mathbf{k} \\ +\frac{\partial X}{\partial u} & \frac{\partial Y}{\partial u} & 0 \\ +\frac{\partial X}{\partial v} & \frac{\partial Y}{\partial v} & 0 +\end{vmatrix} +\end{equation*} +$$ +`; + + test('Latex - Equations don\'t have \$\$', () => { const result = fixLatexEquations(markdown1); expect(result).to.be.equal(output1, 'Result is incorrect'); }); - test('Latex - Equations have $', () => { + test('Latex - Equations have \$', () => { const result = fixLatexEquations(markdown2); expect(result).to.be.equal(markdown2, 'Result is incorrect'); }); - test('Latex - Multiple equations don\'t have $$', () => { + test('Latex - Multiple equations don\'t have \$\$', () => { const result = fixLatexEquations(markdown3); expect(result).to.be.equal(output3, 'Result is incorrect'); }); + + test('Latex - All on the same line', () => { + const line = '\\begin{matrix}1 & 0\\0 & 1\\end{matrix}'; + const after = '\n$$\n\\begin{matrix}1 & 0\\0 & 1\\end{matrix}\n$$\n'; + const result = fixLatexEquations(line); + expect(result).to.be.equal(after, 'Result is incorrect'); + }); + + test('Latex - Invalid', () => { + const invalid = '\n\\begin{eq*}do stuff\\end{eq}'; + const result = fixLatexEquations(invalid); + expect(result).to.be.equal(invalid, 'Result should not have changed'); + }); + + test('Latex - \$\$ already present', () => { + const result = fixLatexEquations(markdown4); + expect(result).to.be.equal(markdown4, 'Result should not have changed'); + }); }); From 3830e009fbc29fe650d3d5a1a1a40f9f2f7ce0f9 Mon Sep 17 00:00:00 2001 From: Rich Chiodo Date: Wed, 20 Nov 2019 17:17:19 -0800 Subject: [PATCH 2/3] Add news entry and fix multiples --- news/2 Fixes/8673.md | 1 + .../interactive-common/latexManipulation.ts | 93 ++++++++++--------- .../latexManipulation.unit.test.ts | 38 ++++++-- 3 files changed, 82 insertions(+), 50 deletions(-) create mode 100644 news/2 Fixes/8673.md diff --git a/news/2 Fixes/8673.md b/news/2 Fixes/8673.md new file mode 100644 index 000000000000..dd3996bb73d5 --- /dev/null +++ b/news/2 Fixes/8673.md @@ -0,0 +1 @@ +Some LaTeX equations do not print in notebooks or the interactive window. diff --git a/src/datascience-ui/interactive-common/latexManipulation.ts b/src/datascience-ui/interactive-common/latexManipulation.ts index 19094ce5b632..724aa9802812 100644 --- a/src/datascience-ui/interactive-common/latexManipulation.ts +++ b/src/datascience-ui/interactive-common/latexManipulation.ts @@ -4,60 +4,67 @@ // tslint:disable-next-line:no-require-imports no-var-requires const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp'); -function appendMatch(input: string) - // Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly. export function fixLatexEquations(input: string): string { const output: string[] = []; // Search for begin/end pairs, outputting as we go let start = 0; - const appendMatch = (beginIndex: endRegex: RegExp, beginIndex: number) + + // Loop until we run out string while (start < input.length) { - // First $$ - const startDollars = /\$\$/.exec(input.substr(start)); - // Then $ - const startDollar = /\$/.exec(input.substr(start)); - // Then /begin{name*} + // Check $$, $ and begin + const dollars = /\$\$/.exec(input.substr(start)); + const dollar = /\$/.exec(input.substr(start)); const begin = /\\begin\{([a-z,\*]+)\}/.exec(input.substr(start)); - if (startDollars && startDollars.index < begin.index) { - // Output till the next $$ - const offset = startDollars.index + 1 + start; - const endDollars = /\$\$/.exec(input.substr(offset)); - if (endDollars) { - const length = endDollars.index + 2 + offset; - output.push(input.substr(start, length)); - start = start + length; - } else { - // Invalid, just return - return input; - } - } else if (startDollar) { - // Output till the next $ - const offset = startDollar.index + 1 + start; - const endDollar = /\$/.exec(input.substr(offset)); - if (endDollar) { - const length = endDollar.index + 1 + offset; - output.push(input.substr(start, length)); - start = start + length; - } else { - // Invalid, just return - return input; - } - } else if (begin && begin.length > 1) { - const offset = begin.index + start; - const endRegex = new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`); - const end = endRegex.exec(input.substr(start)); - if (end) { - const prefix = input.substr(start, begin.index); - const wrapped = input.substr(offset, `\\end{${begin[1]}}`.length + end.index - begin.index); - output.push(`${prefix}\n$$\n${wrapped}\n$$\n`); - start = start + prefix.length + wrapped.length; + let endRegex = /\$\$/; + let endRegexLength = 2; + + // Pick the first that matches + let match = dollars; + let isBeginMatch = false; + if (!match || (dollar && dollar.index < match.index)) { + match = dollar; + endRegex = /\$/; + endRegexLength = 1; + } + if (!match || (begin && begin.index < match.index)) { + match = begin; + endRegex = begin ? new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`) : /\$/; + endRegexLength = begin ? `\\end{${begin[1]}}`.length : 1; + isBeginMatch = true; + } + + // Output this match + if (match) { + if (isBeginMatch) { + // Begin match is a little more complicated. + const offset = match.index + start; + const end = endRegex.exec(input.substr(start)); + if (end) { + const prefix = input.substr(start, match.index); + const wrapped = input.substr(offset, endRegexLength + end.index - match.index); + output.push(`${prefix}\n$$\n${wrapped}\n$$\n`); + start = start + prefix.length + wrapped.length; + } else { + // Invalid, just return + return input; + } } else { - // Invalid, just return - return input; + // Output till the next $ or $$ + const offset = match.index + 1 + start; + const endDollar = endRegex.exec(input.substr(offset)); + if (endDollar) { + const length = endDollar.index + 1 + offset; + output.push(input.substr(start, length)); + start = start + length; + } else { + // Invalid, just return + return input; + } } } else { + // No more matches output.push(input.substr(start)); start = input.length; } diff --git a/src/test/datascience/latexManipulation.unit.test.ts b/src/test/datascience/latexManipulation.unit.test.ts index 4ff25f292b66..c582e968caaf 100644 --- a/src/test/datascience/latexManipulation.unit.test.ts +++ b/src/test/datascience/latexManipulation.unit.test.ts @@ -87,14 +87,33 @@ $$ const markdown4 = ` $$ -\begin{equation*} -\mathbf{V}_1 \times \mathbf{V}_2 = \begin{vmatrix} -\mathbf{i} & \mathbf{j} & \mathbf{k} \\ -\frac{\partial X}{\partial u} & \frac{\partial Y}{\partial u} & 0 \\ -\frac{\partial X}{\partial v} & \frac{\partial Y}{\partial v} & 0 -\end{vmatrix} -\end{equation*} +\\begin{equation*} +\\mathbf{V}_1 \\times \\mathbf{V}_2 = \\begin{vmatrix} +\\mathbf{i} & \\mathbf{j} & \\mathbf{k} \\ +\\frac{\partial X}{\\partial u} & \\frac{\\partial Y}{\\partial u} & 0 \\\\ +\\frac{\partial X}{\\partial v} & \\frac{\\partial Y}{\\partial v} & 0 +\\end{vmatrix} +\\end{equation*} $$ +`; + + const markdown5 = ` +\\begin{equation*} +P(E) = {n \\choose k} p^k (1-p)^{ n-k} +\\end{equation*} + +This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence. +`; + const output5 = ` + +$$ +\\begin{equation*} +P(E) = {n \\choose k} p^k (1-p)^{ n-k} +\\end{equation*} +$$ + + +This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence. `; test('Latex - Equations don\'t have \$\$', () => { @@ -129,4 +148,9 @@ $$ const result = fixLatexEquations(markdown4); expect(result).to.be.equal(markdown4, 'Result should not have changed'); }); + + test('Latex - Multiple types', () => { + const result = fixLatexEquations(markdown5); + expect(result).to.be.equal(output5, 'Result is incorrect'); + }); }); From cbe8af3a74569c426421c3846c52426053035b91 Mon Sep 17 00:00:00 2001 From: Rich Chiodo Date: Wed, 20 Nov 2019 17:28:46 -0800 Subject: [PATCH 3/3] Add a bunch of comments --- .../interactive-common/latexManipulation.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/datascience-ui/interactive-common/latexManipulation.ts b/src/datascience-ui/interactive-common/latexManipulation.ts index 724aa9802812..cde5ef974c5a 100644 --- a/src/datascience-ui/interactive-common/latexManipulation.ts +++ b/src/datascience-ui/interactive-common/latexManipulation.ts @@ -5,6 +5,14 @@ const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp'); // Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly. +// +// The general algorithm here is: +// Search for either $$ or $ or a \begin{name} item. +// If a $$ or $ is found, output up to the next dollar sign +// If a \begin{name} is found, find the matching \end{name}, wrap the section in $$ and output up to the \end. +// +// LaTeX seems to follow the pattern of \begin{name} or is escaped with $$ or $. See here for a bunch of examples: +// https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html export function fixLatexEquations(input: string): string { const output: string[] = [];