Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/strictdoc_11_developer_guide.sdoc
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,15 @@ STATEMENT: >>>

.. code-block:: python

for a_, b_ in foo:
# use a_, b_ within the loop.
for foo_, bar_ in baz:
# use foo_, bar_ within the loop.

- The function arguments with the default values shall be avoided. This convention improves the visibility of the function interfaces at the coast of increased verbosity which is the price that StrictDoc development is willing to pay, maintaining the software long-term. The all-explicit function parameters indication is especially useful when the large code refactorings are made.

- StrictDoc has been making a gradual shift towards a stronger type system. Although type annotations haven't been added everywhere in the codebase, it is preferred to include them for all new code that is written.

- For opening files, use the helpers ``file_open_read_utf8`` and ``file_open_read_bytes``. These helpers perform normal file opening but also strip the UTF-8 BOM character, which is added by some Windows tools.

- If a contribution includes changes in StrictDoc's code, at least the
integration-level tests should be added to the ``tests/integration``. If the
contributed code needs a fine-grained control over the added behavior, adding
Expand Down
3 changes: 2 additions & 1 deletion strictdoc/backend/sdoc/grammar_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from strictdoc.backend.sdoc.pickle_cache import PickleCache
from strictdoc.core.project_config import ProjectConfig
from strictdoc.helpers.cast import assert_optional_cast
from strictdoc.helpers.file_system import file_open_read_utf8
from strictdoc.helpers.textx import (
drop_textx_meta,
preserve_source_location_data,
Expand Down Expand Up @@ -63,7 +64,7 @@ def read_from_file(
if unpickled_content is not None:
return unpickled_content

with open(file_path, encoding="utf-8-sig") as file:
with file_open_read_utf8(file_path) as file:
grammar_content = file.read()

try:
Expand Down
5 changes: 2 additions & 3 deletions strictdoc/backend/sdoc/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from strictdoc.core.project_config import ProjectConfig
from strictdoc.helpers.cast import assert_cast
from strictdoc.helpers.exception import StrictDocException
from strictdoc.helpers.file_system import file_open_read_utf8
from strictdoc.helpers.string import strip_bom
from strictdoc.helpers.textx import drop_textx_meta

Expand Down Expand Up @@ -103,9 +104,7 @@ def read_from_file(
if unpickled_content:
return assert_cast(unpickled_content, SDocDocument)

# utf-8-sig is important here because it strips the UTF BOM markers
# from the beginning of source files created on Windows.
with open(file_path, encoding="utf-8-sig") as file:
with file_open_read_utf8(file_path) as file:
sdoc_content = file.read()

sdoc, parse_context = self.read_with_parse_context(
Expand Down
3 changes: 2 additions & 1 deletion strictdoc/backend/sdoc_source_code/coverage_reports/gcov.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from strictdoc.backend.sdoc.models.section import SDocSection
from strictdoc.core.file_tree import File
from strictdoc.core.project_config import ProjectConfig
from strictdoc.helpers.file_system import file_open_read_utf8


@dataclass
Expand All @@ -41,7 +42,7 @@ def read_from_file(
doc_file: File,
project_config: ProjectConfig,
) -> SDocDocument:
with open(doc_file.full_path, encoding="UTF-8") as file:
with file_open_read_utf8(doc_file.full_path) as file:
content = file.read()
return cls.read_from_string(content, doc_file, project_config)

Expand Down
3 changes: 2 additions & 1 deletion strictdoc/backend/sdoc_source_code/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
validate_marker_uids,
)
from strictdoc.helpers.file_stats import SourceFileStats
from strictdoc.helpers.file_system import file_open_read_utf8
from strictdoc.helpers.textx import drop_textx_meta


Expand Down Expand Up @@ -292,7 +293,7 @@ def read(
return source_file_traceability_info

def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
with open(file_path, encoding="utf-8-sig") as file:
with file_open_read_utf8(file_path) as file:
sdoc_content = file.read()
sdoc = self.read(sdoc_content, file_path=file_path)
return sdoc
3 changes: 2 additions & 1 deletion strictdoc/backend/sdoc_source_code/reader_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
)
from strictdoc.helpers.cast import assert_cast
from strictdoc.helpers.file_stats import SourceFileStats
from strictdoc.helpers.file_system import file_open_read_bytes


class SourceFileTraceabilityReader_C:
Expand Down Expand Up @@ -374,7 +375,7 @@ def read(
return traceability_info

def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
with open(file_path, "rb") as file:
with file_open_read_bytes(file_path) as file:
sdoc_content = file.read()
sdoc = self.read(sdoc_content, file_path=file_path)
return sdoc
Expand Down
3 changes: 2 additions & 1 deletion strictdoc/backend/sdoc_source_code/reader_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from strictdoc.backend.sdoc_source_code.tree_sitter_helpers import traverse_tree
from strictdoc.helpers.file_stats import SourceFileStats
from strictdoc.helpers.file_system import file_open_read_bytes


class SourceFileTraceabilityReader_Python:
Expand Down Expand Up @@ -252,7 +253,7 @@ def read(
return traceability_info

def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
with open(file_path, "rb") as file:
with file_open_read_bytes(file_path) as file:
sdoc_content = file.read()
sdoc = self.read(sdoc_content, file_path=file_path)
return sdoc
Expand Down
3 changes: 2 additions & 1 deletion strictdoc/backend/sdoc_source_code/reader_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
source_file_traceability_info_processor,
)
from strictdoc.helpers.file_stats import SourceFileStats
from strictdoc.helpers.file_system import file_open_read_utf8


class SdocRelationVisitor(ModelVisitor): # type: ignore[misc]
Expand Down Expand Up @@ -184,7 +185,7 @@ def read(
return traceability_info

def read_from_file(self, file_path: str) -> SourceFileTraceabilityInfo:
with open(file_path) as file:
with file_open_read_utf8(file_path) as file:
sdoc_content = file.read()
sdoc = self.read(sdoc_content, file_path=file_path)
return sdoc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from strictdoc.core.file_tree import File
from strictdoc.core.project_config import ProjectConfig
from strictdoc.helpers.cast import assert_cast, assert_optional_cast
from strictdoc.helpers.file_system import file_open_read_utf8
from strictdoc.helpers.paths import path_to_posix_path


Expand Down Expand Up @@ -51,7 +52,7 @@ def read_from_file(
doc_file: File,
project_config: ProjectConfig,
) -> SDocDocument:
with open(doc_file.full_path, encoding="UTF-8") as file:
with file_open_read_utf8(doc_file.full_path) as file:
content = file.read()
return cls.read_from_string(content, doc_file, project_config)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from strictdoc.export.html.renderers.link_renderer import LinkRenderer
from strictdoc.export.html.renderers.markup_renderer import MarkupRenderer
from strictdoc.helpers.cast import assert_cast
from strictdoc.helpers.file_system import file_open_read_utf8
from strictdoc.helpers.timing import measure_performance


Expand Down Expand Up @@ -89,7 +90,7 @@ def export(
traceability_index: TraceabilityIndex,
html_templates: HTMLTemplates,
) -> Markup:
with open(source_file.full_path, encoding="utf-8") as opened_file:
with file_open_read_utf8(source_file.full_path) as opened_file:
source_file_lines = opened_file.readlines()

pygmented_source_file_lines: List[SourceLineEntry] = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from strictdoc.export.html.renderers.link_renderer import LinkRenderer
from strictdoc.export.html.renderers.markup_renderer import MarkupRenderer
from strictdoc.helpers.cast import assert_cast
from strictdoc.helpers.file_system import file_open_read_utf8
from strictdoc.helpers.git_client import GitClient
from strictdoc.helpers.string import interpolate_at_pattern_lazy
from strictdoc.server.helpers.turbo import render_turbo_stream
Expand Down Expand Up @@ -72,7 +73,7 @@ def __init__(

self.custom_html2pdf_template: Optional[Template] = None
if project_config.html2pdf_template is not None:
with open(project_config.html2pdf_template) as f_:
with file_open_read_utf8(project_config.html2pdf_template) as f_:
self.custom_html2pdf_template = Template(f_.read())

def has_included_document(self) -> bool:
Expand Down
5 changes: 3 additions & 2 deletions strictdoc/export/rst/rst_to_html_fragment_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from strictdoc.export.rst.directives.wildcard_enhanced_image import (
WildcardEnhancedImage,
)
from strictdoc.helpers.file_system import file_open_read_bytes


class RstToHtmlFragmentWriter:
Expand Down Expand Up @@ -97,8 +98,8 @@ def write(self, rst_fragment: str, use_cache: bool = True) -> Markup:
)
if use_cache and os.path.isdir(path_to_rst_fragment_bucket_dir):
if os.path.isfile(path_to_cached_fragment):
with open(
path_to_cached_fragment, "rb"
with file_open_read_bytes(
path_to_cached_fragment
) as cached_fragment_file_:
return Markup(cached_fragment_file_.read().decode("UTF-8"))
else:
Expand Down
9 changes: 7 additions & 2 deletions strictdoc/export/spdx/spdx_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from strictdoc.export.spdx.spdx_sdoc_container import SPDXSDocContainer
from strictdoc.export.spdx.spdx_to_sdoc_converter import SPDXToSDocConverter
from strictdoc.helpers.cast import assert_cast
from strictdoc.helpers.file_system import file_open_read_bytes
from strictdoc.helpers.sha256 import get_sha256

RELATION_ID_HOW_TO = "SPDXRef-Relationship-How-to-form-ID?"
Expand Down Expand Up @@ -254,7 +255,9 @@ def export_tree(

for document_ in traceability_index.document_tree.document_list:
assert document_.meta is not None
with open(document_.meta.input_doc_full_path, "rb") as input_file_:
with file_open_read_bytes(
document_.meta.input_doc_full_path
) as input_file_:
document_bytes = input_file_.read()

#
Expand Down Expand Up @@ -340,7 +343,9 @@ def export_tree(
if node_link_path_ in lookup_file_name_to_spdx_file:
continue

with open(node_link_path_, "rb") as node_link_file_:
with file_open_read_bytes(
node_link_path_
) as node_link_file_:
file_bytes = node_link_file_.read()

source_spdx_file = (
Expand Down
25 changes: 24 additions & 1 deletion strictdoc/helpers/file_system.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import codecs
import os
import platform
import shutil
import tempfile
from contextlib import contextmanager
from io import BufferedReader, TextIOWrapper
from pathlib import Path
from typing import Optional
from typing import Iterator, Optional

UTF8_BOM_BYTES = codecs.BOM_UTF8 # b'\xef\xbb\xbf'


def sync_dir(src_dir: str, dst_dir: str, message: Optional[str]) -> None:
Expand Down Expand Up @@ -73,3 +78,21 @@ def get_portable_temp_dir() -> Path:
return Path(
"/tmp" if platform.system() == "Darwin" else tempfile.gettempdir()
)


@contextmanager
def file_open_read_utf8(file_path: str) -> Iterator[TextIOWrapper]:
# utf-8-sig is important here because it strips the UTF BOM markers
# from the beginning of source files created by some Windows tools.
with open(file_path, encoding="utf-8-sig") as file_:
yield file_


@contextmanager
def file_open_read_bytes(file_path: str) -> Iterator[BufferedReader]:
with open(file_path, "rb") as raw_file:
start = raw_file.read(len(UTF8_BOM_BYTES))
if start != UTF8_BOM_BYTES:
# No BOM -> rewind to beginning.
raw_file.seek(0)
yield raw_file
4 changes: 3 additions & 1 deletion strictdoc/helpers/md5.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import hashlib

from strictdoc.helpers.file_system import file_open_read_bytes


def get_md5(obj: str) -> str:
return hashlib.md5(obj.encode("utf-8")).hexdigest()


def get_file_md5(path: str, buf_size: int = 65536) -> str:
m = hashlib.md5()
with open(path, "rb") as f:
with file_open_read_bytes(path) as f:
b = f.read(buf_size)
while len(b) > 0:
m.update(b)
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/strictdoc/helpers/test_file_open_bytes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import tempfile
from io import BufferedReader

import pytest

from strictdoc.helpers.file_system import UTF8_BOM_BYTES, file_open_read_bytes


@pytest.mark.parametrize(
"initial_bytes,expected_bytes",
[
(b"", b""),
(b"A", b"A"),
(b"AB", b"AB"),
(UTF8_BOM_BYTES + b"Hello", b"Hello"),
(b"Hello", b"Hello"),
],
)
def test_file_open_read_bytes(initial_bytes, expected_bytes):
tmp_file = tempfile.NamedTemporaryFile(mode="wb", delete=False)
tmp_path = tmp_file.name

try:
tmp_file.write(initial_bytes)
tmp_file.close()

with file_open_read_bytes(tmp_path) as f:
assert isinstance(f, BufferedReader)
content = f.read()
assert content == expected_bytes
finally:
os.remove(tmp_path)
36 changes: 36 additions & 0 deletions tests/unit/strictdoc/helpers/test_file_open_utf8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import tempfile
from io import TextIOWrapper

import pytest

from strictdoc.helpers.file_system import UTF8_BOM_BYTES, file_open_read_utf8

UTF8_BOM_STR = UTF8_BOM_BYTES.decode("utf-8")


@pytest.mark.parametrize(
"initial_text,expected_text",
[
("", ""),
("A", "A"),
(UTF8_BOM_STR + "Hello", "Hello"),
("Hello", "Hello"),
],
)
def test_file_open_read_utf8(initial_text, expected_text):
tmp_file = tempfile.NamedTemporaryFile(
mode="w", encoding="utf-8", delete=False
)
tmp_path = tmp_file.name

try:
tmp_file.write(initial_text)
tmp_file.close()

with file_open_read_utf8(tmp_path) as f:
assert isinstance(f, TextIOWrapper)
content = f.read()
assert content == expected_text
finally:
os.remove(tmp_path)
Loading