strictdoc-project · stanislaw · Sep 28, 2025 · Sep 28, 2025
diff --git a/docs/strictdoc_11_developer_guide.sdoc b/docs/strictdoc_11_developer_guide.sdoc
@@ -227,13 +227,15 @@ STATEMENT: >>>
 
 .. code-block:: python
 
-    for a_, b_ in foo:
-        # use a_, b_ within the loop.
+    for foo_, bar_ in baz:
+        # use foo_, bar_ within the loop.
 
 - The function arguments with the default values shall be avoided. This convention improves the visibility of the function interfaces at the coast of increased verbosity which is the price that StrictDoc development is willing to pay, maintaining the software long-term. The all-explicit function parameters indication is especially useful when the large code refactorings are made.
 
 - StrictDoc has been making a gradual shift towards a stronger type system. Although type annotations haven't been added everywhere in the codebase, it is preferred to include them for all new code that is written.
 
+- For opening files, use the helpers ``file_open_read_utf8`` and ``file_open_read_bytes``. These helpers perform normal file opening but also strip the UTF-8 BOM character, which is added by some Windows tools.
+
 - If a contribution includes changes in StrictDoc's code, at least the
   integration-level tests should be added to the ``tests/integration``. If the
   contributed code needs a fine-grained control over the added behavior, adding

diff --git a/strictdoc/backend/sdoc/grammar_reader.py b/strictdoc/backend/sdoc/grammar_reader.py
@@ -14,6 +14,7 @@
 from strictdoc.backend.sdoc.pickle_cache import PickleCache
 from strictdoc.core.project_config import ProjectConfig
 from strictdoc.helpers.cast import assert_optional_cast
+from strictdoc.helpers.file_system import file_open_read_utf8
 from strictdoc.helpers.textx import (
     drop_textx_meta,
     preserve_source_location_data,
@@ -63,7 +64,7 @@ def read_from_file(
         if unpickled_content is not None:
             return unpickled_content
 
-        with open(file_path, encoding="utf-8-sig") as file:
+        with file_open_read_utf8(file_path) as file:
             grammar_content = file.read()
 
         try:

diff --git a/strictdoc/backend/sdoc/reader.py b/strictdoc/backend/sdoc/reader.py
@@ -20,6 +20,7 @@
 from strictdoc.core.project_config import ProjectConfig
 from strictdoc.helpers.cast import assert_cast
 from strictdoc.helpers.exception import StrictDocException
+from strictdoc.helpers.file_system import file_open_read_utf8
 from strictdoc.helpers.string import strip_bom
 from strictdoc.helpers.textx import drop_textx_meta
 
@@ -103,9 +104,7 @@ def read_from_file(
         if unpickled_content:
             return assert_cast(unpickled_content, SDocDocument)
 
-        # utf-8-sig is important here because it strips the UTF BOM markers
-        # from the beginning of source files created on Windows.
-        with open(file_path, encoding="utf-8-sig") as file:
+        with file_open_read_utf8(file_path) as file:
             sdoc_content = file.read()
 
         sdoc, parse_context = self.read_with_parse_context(

diff --git a/strictdoc/backend/sdoc_source_code/coverage_reports/gcov.py b/strictdoc/backend/sdoc_source_code/coverage_reports/gcov.py
@@ -20,6 +20,7 @@
 from strictdoc.backend.sdoc.models.section import SDocSection
 from strictdoc.core.file_tree import File
 from strictdoc.core.project_config import ProjectConfig
+from strictdoc.helpers.file_system import file_open_read_utf8
 
 
 @dataclass
@@ -41,7 +42,7 @@ def read_from_file(
         doc_file: File,
         project_config: ProjectConfig,
     ) -> SDocDocument:
-        with open(doc_file.full_path, encoding="UTF-8") as file:
+        with file_open_read_utf8(doc_file.full_path) as file:
             content = file.read()
         return cls.read_from_string(content, doc_file, project_config)
 

diff --git a/strictdoc/backend/sdoc_source_code/reader.py b/strictdoc/backend/sdoc_source_code/reader.py
@@ -26,6 +26,7 @@
     validate_marker_uids,
 )
 from strictdoc.helpers.file_stats import SourceFileStats
+from strictdoc.helpers.file_system import file_open_read_utf8
 from strictdoc.helpers.textx import drop_textx_meta
 
 
@@ -292,7 +293,7 @@ def read(
         return source_file_traceability_info
 
     def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
-        with open(file_path, encoding="utf-8-sig") as file:
+        with file_open_read_utf8(file_path) as file:
             sdoc_content = file.read()
             sdoc = self.read(sdoc_content, file_path=file_path)
             return sdoc
diff --git a/strictdoc/backend/sdoc_source_code/reader_c.py b/strictdoc/backend/sdoc_source_code/reader_c.py
@@ -35,6 +35,7 @@
 )
 from strictdoc.helpers.cast import assert_cast
 from strictdoc.helpers.file_stats import SourceFileStats
+from strictdoc.helpers.file_system import file_open_read_bytes
 
 
 class SourceFileTraceabilityReader_C:
@@ -374,7 +375,7 @@ def read(
         return traceability_info
 
     def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
-        with open(file_path, "rb") as file:
+        with file_open_read_bytes(file_path) as file:
             sdoc_content = file.read()
             sdoc = self.read(sdoc_content, file_path=file_path)
             return sdoc

diff --git a/strictdoc/backend/sdoc_source_code/reader_python.py b/strictdoc/backend/sdoc_source_code/reader_python.py
@@ -30,6 +30,7 @@
 )
 from strictdoc.backend.sdoc_source_code.tree_sitter_helpers import traverse_tree
 from strictdoc.helpers.file_stats import SourceFileStats
+from strictdoc.helpers.file_system import file_open_read_bytes
 
 
 class SourceFileTraceabilityReader_Python:
@@ -252,7 +253,7 @@ def read(
         return traceability_info
 
     def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
-        with open(file_path, "rb") as file:
+        with file_open_read_bytes(file_path) as file:
             sdoc_content = file.read()
             sdoc = self.read(sdoc_content, file_path=file_path)
             return sdoc

diff --git a/strictdoc/backend/sdoc_source_code/reader_robot.py b/strictdoc/backend/sdoc_source_code/reader_robot.py
@@ -37,6 +37,7 @@
     source_file_traceability_info_processor,
 )
 from strictdoc.helpers.file_stats import SourceFileStats
+from strictdoc.helpers.file_system import file_open_read_utf8
 
 
 class SdocRelationVisitor(ModelVisitor):  # type: ignore[misc]
@@ -184,7 +185,7 @@ def read(
         return traceability_info
 
     def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
-        with open(file_path) as file:
+        with file_open_read_utf8(file_path) as file:
             sdoc_content = file.read()
             sdoc = self.read(sdoc_content, file_path=file_path)
             return sdoc
diff --git a/strictdoc/backend/sdoc_source_code/test_reports/junit_xml_reader.py b/strictdoc/backend/sdoc_source_code/test_reports/junit_xml_reader.py
@@ -22,6 +22,7 @@
 from strictdoc.core.file_tree import File
 from strictdoc.core.project_config import ProjectConfig
 from strictdoc.helpers.cast import assert_cast, assert_optional_cast
+from strictdoc.helpers.file_system import file_open_read_utf8
 from strictdoc.helpers.paths import path_to_posix_path
 
 
@@ -51,7 +52,7 @@ def read_from_file(
         doc_file: File,
         project_config: ProjectConfig,
     ) -> SDocDocument:
-        with open(doc_file.full_path, encoding="UTF-8") as file:
+        with file_open_read_utf8(doc_file.full_path) as file:
             content = file.read()
         return cls.read_from_string(content, doc_file, project_config)
 

diff --git a/strictdoc/export/html/generators/source_file_view_generator.py b/strictdoc/export/html/generators/source_file_view_generator.py
@@ -44,6 +44,7 @@
 from strictdoc.export.html.renderers.link_renderer import LinkRenderer
 from strictdoc.export.html.renderers.markup_renderer import MarkupRenderer
 from strictdoc.helpers.cast import assert_cast
+from strictdoc.helpers.file_system import file_open_read_utf8
 from strictdoc.helpers.timing import measure_performance
 
 
@@ -89,7 +90,7 @@ def export(
         traceability_index: TraceabilityIndex,
         html_templates: HTMLTemplates,
     ) -> Markup:
-        with open(source_file.full_path, encoding="utf-8") as opened_file:
+        with file_open_read_utf8(source_file.full_path) as opened_file:
             source_file_lines = opened_file.readlines()
 
         pygmented_source_file_lines: List[SourceLineEntry] = []

diff --git a/strictdoc/export/html/generators/view_objects/document_screen_view_object.py b/strictdoc/export/html/generators/view_objects/document_screen_view_object.py
@@ -28,6 +28,7 @@
 from strictdoc.export.html.renderers.link_renderer import LinkRenderer
 from strictdoc.export.html.renderers.markup_renderer import MarkupRenderer
 from strictdoc.helpers.cast import assert_cast
+from strictdoc.helpers.file_system import file_open_read_utf8
 from strictdoc.helpers.git_client import GitClient
 from strictdoc.helpers.string import interpolate_at_pattern_lazy
 from strictdoc.server.helpers.turbo import render_turbo_stream
@@ -72,7 +73,7 @@ def __init__(
 
         self.custom_html2pdf_template: Optional[Template] = None
         if project_config.html2pdf_template is not None:
-            with open(project_config.html2pdf_template) as f_:
+            with file_open_read_utf8(project_config.html2pdf_template) as f_:
                 self.custom_html2pdf_template = Template(f_.read())
 
     def has_included_document(self) -> bool:

diff --git a/strictdoc/export/rst/rst_to_html_fragment_writer.py b/strictdoc/export/rst/rst_to_html_fragment_writer.py
@@ -24,6 +24,7 @@
 from strictdoc.export.rst.directives.wildcard_enhanced_image import (
     WildcardEnhancedImage,
 )
+from strictdoc.helpers.file_system import file_open_read_bytes
 
 
 class RstToHtmlFragmentWriter:
@@ -97,8 +98,8 @@ def write(self, rst_fragment: str, use_cache: bool = True) -> Markup:
         )
         if use_cache and os.path.isdir(path_to_rst_fragment_bucket_dir):
             if os.path.isfile(path_to_cached_fragment):
-                with open(
-                    path_to_cached_fragment, "rb"
+                with file_open_read_bytes(
+                    path_to_cached_fragment
                 ) as cached_fragment_file_:
                     return Markup(cached_fragment_file_.read().decode("UTF-8"))
         else:

diff --git a/strictdoc/export/spdx/spdx_generator.py b/strictdoc/export/spdx/spdx_generator.py
@@ -59,6 +59,7 @@
 from strictdoc.export.spdx.spdx_sdoc_container import SPDXSDocContainer
 from strictdoc.export.spdx.spdx_to_sdoc_converter import SPDXToSDocConverter
 from strictdoc.helpers.cast import assert_cast
+from strictdoc.helpers.file_system import file_open_read_bytes
 from strictdoc.helpers.sha256 import get_sha256
 
 RELATION_ID_HOW_TO = "SPDXRef-Relationship-How-to-form-ID?"
@@ -254,7 +255,9 @@ def export_tree(
 
         for document_ in traceability_index.document_tree.document_list:
             assert document_.meta is not None
-            with open(document_.meta.input_doc_full_path, "rb") as input_file_:
+            with file_open_read_bytes(
+                document_.meta.input_doc_full_path
+            ) as input_file_:
                 document_bytes = input_file_.read()
 
             #
@@ -340,7 +343,9 @@ def export_tree(
                         if node_link_path_ in lookup_file_name_to_spdx_file:
                             continue
 
-                        with open(node_link_path_, "rb") as node_link_file_:
+                        with file_open_read_bytes(
+                            node_link_path_
+                        ) as node_link_file_:
                             file_bytes = node_link_file_.read()
 
                         source_spdx_file = (

diff --git a/strictdoc/helpers/file_system.py b/strictdoc/helpers/file_system.py
@@ -1,9 +1,14 @@
+import codecs
 import os
 import platform
 import shutil
 import tempfile
+from contextlib import contextmanager
+from io import BufferedReader, TextIOWrapper
 from pathlib import Path
-from typing import Optional
+from typing import Iterator, Optional
+
+UTF8_BOM_BYTES = codecs.BOM_UTF8  # b'\xef\xbb\xbf'
 
 
 def sync_dir(src_dir: str, dst_dir: str, message: Optional[str]) -> None:
@@ -73,3 +78,21 @@ def get_portable_temp_dir() -> Path:
     return Path(
         "/tmp" if platform.system() == "Darwin" else tempfile.gettempdir()
     )
+
+
+@contextmanager
+def file_open_read_utf8(file_path: str) -> Iterator[TextIOWrapper]:
+    # utf-8-sig is important here because it strips the UTF BOM markers
+    # from the beginning of source files created by some Windows tools.
+    with open(file_path, encoding="utf-8-sig") as file_:
+        yield file_
+
+
+@contextmanager
+def file_open_read_bytes(file_path: str) -> Iterator[BufferedReader]:
+    with open(file_path, "rb") as raw_file:
+        start = raw_file.read(len(UTF8_BOM_BYTES))
+        if start != UTF8_BOM_BYTES:
+            # No BOM -> rewind to beginning.
+            raw_file.seek(0)
+        yield raw_file
diff --git a/strictdoc/helpers/md5.py b/strictdoc/helpers/md5.py
@@ -1,13 +1,15 @@
 import hashlib
 
+from strictdoc.helpers.file_system import file_open_read_bytes
+
 
 def get_md5(obj: str) -> str:
     return hashlib.md5(obj.encode("utf-8")).hexdigest()
 
 
 def get_file_md5(path: str, buf_size: int = 65536) -> str:
     m = hashlib.md5()
-    with open(path, "rb") as f:
+    with file_open_read_bytes(path) as f:
         b = f.read(buf_size)
         while len(b) > 0:
             m.update(b)

diff --git a/tests/unit/strictdoc/helpers/test_file_open_bytes.py b/tests/unit/strictdoc/helpers/test_file_open_bytes.py
@@ -0,0 +1,33 @@
+import os
+import tempfile
+from io import BufferedReader
+
+import pytest
+
+from strictdoc.helpers.file_system import UTF8_BOM_BYTES, file_open_read_bytes
+
+
+@pytest.mark.parametrize(
+    "initial_bytes,expected_bytes",
+    [
+        (b"", b""),
+        (b"A", b"A"),
+        (b"AB", b"AB"),
+        (UTF8_BOM_BYTES + b"Hello", b"Hello"),
+        (b"Hello", b"Hello"),
+    ],
+)
+def test_file_open_read_bytes(initial_bytes, expected_bytes):
+    tmp_file = tempfile.NamedTemporaryFile(mode="wb", delete=False)
+    tmp_path = tmp_file.name
+
+    try:
+        tmp_file.write(initial_bytes)
+        tmp_file.close()
+
+        with file_open_read_bytes(tmp_path) as f:
+            assert isinstance(f, BufferedReader)
+            content = f.read()
+            assert content == expected_bytes
+    finally:
+        os.remove(tmp_path)
diff --git a/tests/unit/strictdoc/helpers/test_file_open_utf8.py b/tests/unit/strictdoc/helpers/test_file_open_utf8.py
@@ -0,0 +1,36 @@
+import os
+import tempfile
+from io import TextIOWrapper
+
+import pytest
+
+from strictdoc.helpers.file_system import UTF8_BOM_BYTES, file_open_read_utf8
+
+UTF8_BOM_STR = UTF8_BOM_BYTES.decode("utf-8")
+
+
+@pytest.mark.parametrize(
+    "initial_text,expected_text",
+    [
+        ("", ""),
+        ("A", "A"),
+        (UTF8_BOM_STR + "Hello", "Hello"),
+        ("Hello", "Hello"),
+    ],
+)
+def test_file_open_read_utf8(initial_text, expected_text):
+    tmp_file = tempfile.NamedTemporaryFile(
+        mode="w", encoding="utf-8", delete=False
+    )
+    tmp_path = tmp_file.name
+
+    try:
+        tmp_file.write(initial_text)
+        tmp_file.close()
+
+        with file_open_read_utf8(tmp_path) as f:
+            assert isinstance(f, TextIOWrapper)
+            content = f.read()
+            assert content == expected_text
+    finally:
+        os.remove(tmp_path)