Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/2 Fixes/8673.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Some LaTeX equations do not print in notebooks or the interactive window.
97 changes: 70 additions & 27 deletions src/datascience-ui/interactive-common/latexManipulation.ts
Original file line number Diff line number Diff line change
@@ -1,38 +1,81 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// tslint:disable-next-line:no-require-imports no-var-requires
const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp');

// Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly.
//
// The general algorithm here is:
// Search for either $$ or $ or a \begin{name} item.
// If a $$ or $ is found, output up to the next dollar sign
// If a \begin{name} is found, find the matching \end{name}, wrap the section in $$ and output up to the \end.
//
// LaTeX seems to follow the pattern of \begin{name} or is escaped with $$ or $. See here for a bunch of examples:
// https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html
export function fixLatexEquations(input: string): string {
const block = '\n$$\n';
const output: string[] = [];

const beginIndexes = getAllIndexesOfRegex(input, /\\begin\{[a-z]*\*?\}/g);
const endIndexes = getAllIndexesOfRegex(input, /\\end\{[a-z]*\*?\}/g);
// Search for begin/end pairs, outputting as we go
let start = 0;

if (beginIndexes.length === endIndexes.length) {
for (let i = 0; i < beginIndexes.length; i += 1) {
const endOfEnd = input.indexOf('}', endIndexes[i] + 1 + 8 * i);
// Loop until we run out string
while (start < input.length) {
// Check $$, $ and begin
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, maybe I'll put a more general comment about the algorithm being used.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even after reading the comments, its quite complicated. I guess there's nothing we can do about that...

const dollars = /\$\$/.exec(input.substr(start));
const dollar = /\$/.exec(input.substr(start));
const begin = /\\begin\{([a-z,\*]+)\}/.exec(input.substr(start));
let endRegex = /\$\$/;
let endRegexLength = 2;

// Edge case, if the input starts with the latex formula we add the block at the beggining.
if (beginIndexes[i] === 0 && input[beginIndexes[i]] === '\\') {
input = block + input.slice(0, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length);
// Normal case, if the latex formula starts with a '$' we don't do anything.
// Otherwise, we insert the block at the beginning and ending of the latex formula.
} else if (input[beginIndexes[i] - 1] !== '$') {
input = input.slice(0, beginIndexes[i] + block.length * 2 * i) + block + input.slice(beginIndexes[i] + block.length * 2 * i, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length);
}
// Pick the first that matches
let match = dollars;
let isBeginMatch = false;
if (!match || (dollar && dollar.index < match.index)) {
match = dollar;
endRegex = /\$/;
endRegexLength = 1;
}
if (!match || (begin && begin.index < match.index)) {
match = begin;
endRegex = begin ? new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`) : /\$/;
endRegexLength = begin ? `\\end{${begin[1]}}`.length : 1;
isBeginMatch = true;
}
}

return input;
}

function getAllIndexesOfRegex(arr: string, value: RegExp): number[] {
const indexes = [];
let result;

// tslint:disable-next-line: no-conditional-assignment
while ((result = value.exec(arr)) !== null) {
indexes.push(result.index);
// Output this match
if (match) {
if (isBeginMatch) {
// Begin match is a little more complicated.
const offset = match.index + start;
const end = endRegex.exec(input.substr(start));
if (end) {
const prefix = input.substr(start, match.index);
const wrapped = input.substr(offset, endRegexLength + end.index - match.index);
output.push(`${prefix}\n$$\n${wrapped}\n$$\n`);
start = start + prefix.length + wrapped.length;
} else {
// Invalid, just return
return input;
}
} else {
// Output till the next $ or $$
const offset = match.index + 1 + start;
const endDollar = endRegex.exec(input.substr(offset));
if (endDollar) {
const length = endDollar.index + 1 + offset;
output.push(input.substr(start, length));
start = start + length;
} else {
// Invalid, just return
return input;
}
}
} else {
// No more matches
output.push(input.substr(start));
start = input.length;
}
}

return indexes;
return output.join('');
}
60 changes: 57 additions & 3 deletions src/test/datascience/latexManipulation.unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,72 @@ $$
$$
`;

test('Latex - Equations don\'t have $$', () => {
const markdown4 = `
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

THis is the one that was failing in the bug

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tip you can use dedent to indent code,
(its more reliable and avoids issues when code gets accidentally indented).
check

test('Execute a file and capture stdout (with unicode)', async () => {
const source = dedent`
import sys
sys.stdout.write("HELLO WORLD-₹-😄")
`;
const fileToExecute = await createPythonFile(source);
const output = await pythonDaemon.exec([fileToExecute], {});
assert.isUndefined(output.stderr);
assert.deepEqual(output.stdout, 'HELLO WORLD-₹-😄');
});

$$
\\begin{equation*}
\\mathbf{V}_1 \\times \\mathbf{V}_2 = \\begin{vmatrix}
\\mathbf{i} & \\mathbf{j} & \\mathbf{k} \\
\\frac{\partial X}{\\partial u} & \\frac{\\partial Y}{\\partial u} & 0 \\\\
\\frac{\partial X}{\\partial v} & \\frac{\\partial Y}{\\partial v} & 0
\\end{vmatrix}
\\end{equation*}
$$
`;

const markdown5 = `
\\begin{equation*}
P(E) = {n \\choose k} p^k (1-p)^{ n-k}
\\end{equation*}

This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence.
`;
const output5 = `

$$
\\begin{equation*}
P(E) = {n \\choose k} p^k (1-p)^{ n-k}
\\end{equation*}
$$


This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence.
`;

test('Latex - Equations don\'t have \$\$', () => {
const result = fixLatexEquations(markdown1);
expect(result).to.be.equal(output1, 'Result is incorrect');
});

test('Latex - Equations have $', () => {
test('Latex - Equations have \$', () => {
const result = fixLatexEquations(markdown2);
expect(result).to.be.equal(markdown2, 'Result is incorrect');
});

test('Latex - Multiple equations don\'t have $$', () => {
test('Latex - Multiple equations don\'t have \$\$', () => {
const result = fixLatexEquations(markdown3);
expect(result).to.be.equal(output3, 'Result is incorrect');
});

test('Latex - All on the same line', () => {
const line = '\\begin{matrix}1 & 0\\0 & 1\\end{matrix}';
const after = '\n$$\n\\begin{matrix}1 & 0\\0 & 1\\end{matrix}\n$$\n';
const result = fixLatexEquations(line);
expect(result).to.be.equal(after, 'Result is incorrect');
});

test('Latex - Invalid', () => {
const invalid = '\n\\begin{eq*}do stuff\\end{eq}';
const result = fixLatexEquations(invalid);
expect(result).to.be.equal(invalid, 'Result should not have changed');
});

test('Latex - \$\$ already present', () => {
const result = fixLatexEquations(markdown4);
expect(result).to.be.equal(markdown4, 'Result should not have changed');
});

test('Latex - Multiple types', () => {
const result = fixLatexEquations(markdown5);
expect(result).to.be.equal(output5, 'Result is incorrect');
});
});