diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py index 6085ad6bb..39eaab767 100644 --- a/packages/markitdown/src/markitdown/__main__.py +++ b/packages/markitdown/src/markitdown/__main__.py @@ -206,12 +206,19 @@ def _handle_output(args, result: DocumentConverterResult): with open(args.output, "w", encoding="utf-8") as f: f.write(result.markdown) else: - # Handle stdout encoding errors more gracefully - print( - result.markdown.encode(sys.stdout.encoding, errors="replace").decode( - sys.stdout.encoding + # Write UTF-8 directly to the underlying binary buffer when available. + # This avoids UnicodeEncodeError on systems whose locale encoding + # (e.g. GBK on Chinese Windows) cannot represent all Unicode characters + # in the markdown output, and also handles the case where + # sys.stdout.encoding is None (e.g. when stdout is a raw pipe). + if hasattr(sys.stdout, "buffer"): + sys.stdout.buffer.write(result.markdown.encode("utf-8")) + sys.stdout.buffer.flush() + else: + encoding = sys.stdout.encoding or "utf-8" + print( + result.markdown.encode(encoding, errors="replace").decode(encoding) ) - ) def _exit_with_error(message: str):