Skip to content

Commit

Permalink
Add mathml support
Browse files Browse the repository at this point in the history
Resolves: #421
  • Loading branch information
ggodlewski committed Apr 26, 2024
1 parent 6390f34 commit c50a54a
Show file tree
Hide file tree
Showing 15 changed files with 154 additions and 22 deletions.
30 changes: 30 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
"lunr-languages": "1.10.0",
"marked": "9.0.2",
"mathjs": "10.5.0",
"mathml-to-latex": "1.4.0",
"minimist": "1.2.6",
"mitt": "^3.0.0",
"open": "^7.4.2",
Expand Down
3 changes: 2 additions & 1 deletion src/containers/job/worker.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { parentPort } from 'worker_threads';
import {executeOdtToMarkdown} from '../../odt/executeOdtToMarkdown';

import {executeOdtToMarkdown} from '../../odt/executeOdtToMarkdown.ts';

parentPort.on('message', async (msg) => {
try {
Expand Down
9 changes: 6 additions & 3 deletions src/containers/transform/TaskLocalFileTransform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ export class TaskLocalFileTransform extends QueueTask {

const rewriteRules = this.userConfig.rewrite_rules || [];

const picturesDirAbsolute = destinationPath + '/' + this.realFileName.replace(/.md$/, '.assets/');

if (SINGLE_THREADED_TRANSFORM) {
const processor = new OdtProcessor(odtPath, true);
await processor.load();
Expand All @@ -156,9 +158,9 @@ export class TaskLocalFileTransform extends QueueTask {
const converter = new OdtToMarkdown(document, styles, fileNameMap);
converter.setRewriteRules(rewriteRules);
if (this.realFileName === '_index.md') {
converter.setPicturesDir('./' + this.realFileName.replace(/.md$/, '.assets/'));
converter.setPicturesDir('./' + this.realFileName.replace(/.md$/, '.assets/'), picturesDirAbsolute);
} else {
converter.setPicturesDir('../' + this.realFileName.replace(/.md$/, '.assets/'));
converter.setPicturesDir('../' + this.realFileName.replace(/.md$/, '.assets/'), picturesDirAbsolute);
}
markdown = await converter.convert();
links = Array.from(converter.links);
Expand All @@ -173,9 +175,10 @@ export class TaskLocalFileTransform extends QueueTask {
errors: Array<string>;
}

const workerResult: WorkerResult = await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', {
const workerResult: WorkerResult = <WorkerResult>await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', {
localFile,
realFileName: this.realFileName,
picturesDirAbsolute,
odtPath,
destinationPath,
rewriteRules,
Expand Down
1 change: 1 addition & 0 deletions src/odt/MarkdownNodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export type TAG = 'BODY' | 'HR/' | 'B' | 'I' | 'BI' | 'BLANK/' | // | '/B' | '/I
'TOC' | 'SVG/' | 'IMG/' | // | '/TOC'
'EMB_SVG' | 'EMB_SVG_G' | 'EMB_SVG_P/' | 'EMB_SVG_TEXT' | // | '/EMB_SVG' | '/EMB_SVG_G' | '/EMB_SVG_TEXT'
'EMB_SVG_TSPAN' | // | '/EMB_SVG_TSPAN'
'MATHML' |
'CHANGE_START' | 'CHANGE_END' | 'RAW_MODE/' | 'HTML_MODE/' | 'MD_MODE/' | 'MACRO_MODE/' | 'COMMENT';

export interface TagPayload {
Expand Down
19 changes: 19 additions & 0 deletions src/odt/OdtProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ export class OdtProcessor {
fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer);
}

for (const relativePath in this.files) {
if (!relativePath.endsWith('/content.xml')) {
continue;
}

const fileName = relativePath.replace('/content.xml', '.xml').replace(/\s/g, '_');
if (fileName.indexOf('/') === -1) {
const entry = this.files[relativePath];
const buffer = await entry.async('nodebuffer');

this.fileNameMap[fileName] = fileName;
written.push(this.fileNameMap[fileName]);
if (!fs.existsSync(assetsDirectory)) {
fs.mkdirSync(assetsDirectory, { recursive: true });
}
fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer);
}
}

if (fs.existsSync(assetsDirectory)) {
const files = fs.readdirSync(assetsDirectory);
for (const file of files) {
Expand Down
29 changes: 26 additions & 3 deletions src/odt/OdtToMarkdown.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import path from 'path';
import fs from 'fs';
import { MathMLToLaTeX } from 'mathml-to-latex';

import {
DocumentContent, DocumentStyles, DrawCustomShape, DrawEnhancedGeometry,
DrawFrame, DrawG,
Expand All @@ -17,7 +21,7 @@ import {
TextSpan
} from './LibreOffice.ts';
import {urlToFolderId} from '../utils/idParsers.ts';
import {MarkdownNodes, MarkdownTagNode} from './MarkdownNodes.ts';
import {MarkdownNodes, MarkdownTagNode, MarkdownTextNode} from './MarkdownNodes.ts';

Check notice

Code scanning / CodeQL

Unused variable, import, function or class Note

Unused import MarkdownTextNode.
import {inchesToPixels, inchesToSpaces, spaces} from './utils.ts';
import {extractPath} from './extractPath.ts';
import {mergeDeep} from './mergeDeep.ts';
Expand Down Expand Up @@ -64,6 +68,7 @@ export class OdtToMarkdown {
public readonly links: Set<string> = new Set<string>();
private readonly chunks: MarkdownNodes = new MarkdownNodes();
private picturesDir = '';
private picturesDirAbsolute = '';
private rewriteRules: RewriteRule[] = [];

constructor(private document: DocumentContent, private documentStyles: DocumentStyles, private fileNameMap: FileNameMap = {}) {
Expand Down Expand Up @@ -378,7 +383,24 @@ export class OdtToMarkdown {
}

async drawFrameToText(currentTagNode: MarkdownTagNode, drawFrame: DrawFrame) {
if (drawFrame.object) { // TODO: MathML
if (drawFrame.object) {
if (!this.picturesDir) {
return;
}
if (drawFrame.object.href) {
const filePath = path.join(this.picturesDirAbsolute, drawFrame.object.href.replace(/\s/g, '_') + '.xml');
try {
const mathMl = new TextDecoder().decode(fs.readFileSync(filePath));
if (mathMl.indexOf('<math ') > -1) {
const node = this.chunks.createNode('MATHML');
const latex = MathMLToLaTeX.convert(mathMl);
this.chunks.appendText(node, latex);
this.chunks.append(currentTagNode, node);
}
} catch (err) {
console.warn(err);
}
}
return;
}
if (drawFrame.image) {
Expand Down Expand Up @@ -666,8 +688,9 @@ export class OdtToMarkdown {
}
}

setPicturesDir(picturesDir: string) {
setPicturesDir(picturesDir: string, picturesDirAbsolute?: string) {
this.picturesDir = picturesDir;
this.picturesDirAbsolute = picturesDirAbsolute || picturesDir;
}

setRewriteRules(rewriteRules: RewriteRule[]) {
Expand Down
15 changes: 8 additions & 7 deletions src/odt/executeOdtToMarkdown.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import {OdtToMarkdown} from './OdtToMarkdown';
import {UnMarshaller} from './UnMarshaller';
import {DocumentStyles, LIBREOFFICE_CLASSES} from './LibreOffice';
import {generateDocumentFrontMatter} from '../containers/transform/frontmatters/generateDocumentFrontMatter';
import {OdtProcessor} from './OdtProcessor';
import fs from 'fs';
import path from 'path';

import {OdtToMarkdown} from './OdtToMarkdown.ts';
import {UnMarshaller} from './UnMarshaller.ts';
import {DocumentStyles, LIBREOFFICE_CLASSES} from './LibreOffice.ts';
import {generateDocumentFrontMatter} from '../containers/transform/frontmatters/generateDocumentFrontMatter.ts';
import {OdtProcessor} from './OdtProcessor.ts';

export async function executeOdtToMarkdown(workerData) {
const processor = new OdtProcessor(workerData.odtPath, true);
await processor.load();
Expand All @@ -26,9 +27,9 @@ export async function executeOdtToMarkdown(workerData) {
const converter = new OdtToMarkdown(document, styles, fileNameMap);
converter.setRewriteRules(workerData.rewriteRules);
if (workerData.realFileName === '_index.md') {
converter.setPicturesDir('./' + workerData.realFileName.replace(/.md$/, '.assets/'));
converter.setPicturesDir('./' + workerData.realFileName.replace(/.md$/, '.assets/'), workerData.picturesDirAbsolute);
} else {
converter.setPicturesDir('../' + workerData.realFileName.replace(/.md$/, '.assets/'));
converter.setPicturesDir('../' + workerData.realFileName.replace(/.md$/, '.assets/'), workerData.picturesDirAbsolute);
}
const markdown = await converter.convert();
const links = Array.from(converter.links);
Expand Down
27 changes: 27 additions & 0 deletions src/odt/postprocess/convertMathMl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import {MarkdownNodes} from '../MarkdownNodes.js';
import {walkRecursiveSync} from '../markdownNodesUtils.js';

export function convertMathMl(markdownChunks: MarkdownNodes) {
walkRecursiveSync(markdownChunks.body, (chunk, ctx: { nodeIdx: number }) => {
if (!(chunk.isTag && chunk.tag === 'MATHML')) {
return;
}

const prevChunk = chunk.parent.children[ctx.nodeIdx - 1];
const nextChunk = chunk.parent.children[ctx.nodeIdx + 1];

if (prevChunk?.isTag === false || nextChunk?.isTag === false) {
const text = chunk.children.filter(c => c.isTag === false).map(c => c['text']).join('\n');
chunk.parent.children.splice(ctx.nodeIdx, 1, {
isTag: false,
text: '$$' + text + '$$'
});
return;
}

chunk.tag = 'PRE';
chunk.payload.lang = 'math';
const brNode = markdownChunks.createNode('EMPTY_LINE/');
chunk.parent.children.splice(ctx.nodeIdx + 1, 0, brNode);
});
}
4 changes: 4 additions & 0 deletions src/odt/postprocess/mergeParagraphs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ export function mergeParagraphs(markdownChunks: MarkdownNodes) {
}

if (chunk.isTag && ['P', 'PRE'].includes(chunk.tag)) {
if (chunk.tag === 'PRE' && chunk.payload?.lang === 'math') {
return;
}

const nextChunk = chunk.parent.children[ctx.nodeIdx + 1];
if (nextChunk?.isTag && nextChunk.tag === chunk.tag) {
const children = nextChunk.children.splice(0, nextChunk.children.length);
Expand Down
2 changes: 2 additions & 0 deletions src/odt/postprocess/postProcess.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {removeEmptyTags} from './removeEmptyTags.ts';
import {removeExcessiveLines} from './removeExcessiveLines.ts';
import {applyRewriteRules} from './applyRewriteRules.ts';
import {RewriteRule} from '../applyRewriteRule.ts';
import {convertMathMl} from './convertMathMl.js';

export async function postProcess(chunks: MarkdownNodes, rewriteRules: RewriteRule[]) {
convertToc(chunks);
Expand All @@ -31,6 +32,7 @@ export async function postProcess(chunks: MarkdownNodes, rewriteRules: RewriteRu
fixSpacesInsideInlineFormatting(chunks);
await fixBoldItalic(chunks);
hideSuggestedChanges(chunks);
convertMathMl(chunks);

trimParagraphs(chunks);
addEmptyLinesAfterParas(chunks);
Expand Down
14 changes: 11 additions & 3 deletions test/odt_md/MarkDownTransform.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {assert} from 'chai';
import fs from 'fs';

import {compareTexts} from '../utils.ts';
import {compareTexts, createTmpDir} from '../utils.ts';
import {OdtToMarkdown} from '../../src/odt/OdtToMarkdown.ts';
import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../../src/odt/LibreOffice.ts';
import {UnMarshaller} from '../../src/odt/UnMarshaller.ts';
Expand Down Expand Up @@ -137,13 +137,20 @@ async function transformOdt(id: string) {
const odtPath = folder.getRealPath() + '/' + id + '.odt';
const processor = new OdtProcessor(odtPath);
await processor.load();
const tmpDir: string = createTmpDir();
await processor.unzipAssets(tmpDir, id + '.md');
if (!processor.getContentXml()) {
throw Error('No odt processed');
}
return transform(processor.getContentXml(), processor.getStylesXml());
try {
const markdown = await transform(processor.getContentXml(), processor.getStylesXml(), tmpDir + `/${id}.assets`);
return markdown.replaceAll(tmpDir + `/${id}.assets`, '');
} finally {
fs.rmSync(tmpDir, { recursive: true });
}
}

async function transform(contentXml: string, stylesXml: string) {
async function transform(contentXml: string, stylesXml: string, assetsDir: string) {
const parser = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentContent');
const document: DocumentContent = parser.unmarshal(contentXml);
if (!document) {
Expand All @@ -155,5 +162,6 @@ async function transform(contentXml: string, stylesXml: string) {
throw Error('No styles unmarshalled');
}
const converter = new OdtToMarkdown(document, styles);
converter.setPicturesDir(assetsDir);
return await converter.convert();
}
18 changes: 15 additions & 3 deletions test/odt_md/example-document.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ After subtable</td>

## Image

![](1000000000000200000001804F9AAE46CD6D0DF2.gif)
![](1000000000000640000001CF60FB0243CA95EC14.jpg)

![](10000000000003F0000003F092F85671239C65F9.jpg)

## Preformatted Text

Expand All @@ -80,7 +82,7 @@ Code blocks are part of the Markdown spec, but syntax highlighting isn't. Howeve

### Typescript / Javascript

{{% markdown %}}
{{markdown}}
```javascript

class MyClass {
Expand All @@ -98,7 +100,7 @@ module MyModule {
declare magicNumber number;
myArray.forEach(() => { }); // fat arrow syntax
```
{{% /markdown %}}
{{/markdown}}

## Video

Expand Down Expand Up @@ -136,10 +138,20 @@ Some **bold** **_boldanditalic_*** italic* text

### Using the actual equation object

```math
E = m c^{2}
```

```math
e^{i \pi} - 1 = 0
```

### Text equivalent

*E=mc**2*

Inline $$E = m c^{2}$$ math

## Footnotes

1Footnotes should display as a footnote, and should always display at the very end of the document (page)**?** This is some sample text with a footnote.
Expand Down
Binary file modified test/odt_md/example-document.odt
Binary file not shown.
4 changes: 2 additions & 2 deletions test/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import path from 'path';
import {createPatch} from 'diff';
import {ansi_colors} from '../src/utils/logger/colors.ts';

export function createTmpDir() {
return fs.mkdtempSync(path.join(os.tmpdir(), 'wg-'));
export function createTmpDir(prefix = 'wg-') {
return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
Expand Down

0 comments on commit c50a54a

Please sign in to comment.