From f21fede1d46b911e0d39653e7453546a1ba12a90 Mon Sep 17 00:00:00 2001
From: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
Date: Sat, 30 May 2026 18:27:27 +0800
Subject: [PATCH] fix: preserve malformed docx math content
---
.../converter_utils/docx/math/omml.py | 4 ++-
.../converter_utils/docx/pre_process.py | 2 ++
packages/markitdown/tests/test_docx_math.py | 26 +++++++++++++++++++
3 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 packages/markitdown/tests/test_docx_math.py
diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
index dfa734cdc..73d93c5c8 100644
--- a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
@@ -272,7 +272,9 @@ def do_fname(self, elm):
if FUNC.get(t):
latex_chars.append(FUNC[t])
else:
- raise NotImplementedError("Not support func %s" % t)
+ latex_chars.append(
+ "\\operatorname{%s}(%s)" % (escape_latex(t), FUNC_PLACE)
+ )
else:
latex_chars.append(t)
t = BLANK.join(latex_chars)
diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py b/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py
index d6fa8db69..05e00a610 100644
--- a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py
@@ -44,6 +44,8 @@ def _convert_omath_to_latex(tag: Tag) -> str:
math_root = ET.fromstring(MATH_ROOT_TEMPLATE.format(str(tag)))
# Find the 'oMath' element within the XML document
math_element = math_root.find(OMML_NS + "oMath")
+ if math_element is None:
+ return tag.get_text("", strip=True)
# Convert the 'oMath' element to LaTeX using the oMath2Latex function
latex = oMath2Latex(math_element).latex
return latex
diff --git a/packages/markitdown/tests/test_docx_math.py b/packages/markitdown/tests/test_docx_math.py
new file mode 100644
index 000000000..37bcbf17f
--- /dev/null
+++ b/packages/markitdown/tests/test_docx_math.py
@@ -0,0 +1,26 @@
+from bs4 import BeautifulSoup
+from defusedxml import ElementTree as ET
+
+from markitdown.converter_utils.docx.math.omml import oMath2Latex
+from markitdown.converter_utils.docx.pre_process import _convert_omath_to_latex
+
+
+def test_convert_omath_without_namespaced_child_returns_text() -> None:
+ soup = BeautifulSoup(b"x", "xml")
+
+ assert _convert_omath_to_latex(soup.find("oMath")) == "x"
+
+
+def test_unknown_omml_function_uses_operatorname() -> None:
+ root = ET.fromstring(
+ """
+
+
+ log
+ x
+
+
+ """
+ )
+
+ assert oMath2Latex(root).latex == r"\operatorname{log}(x)"