Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions mkdocstrings_handlers/vba/_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
MutableMapping,
Dict,
Mapping,
Set,
Tuple,
)

Expand Down Expand Up @@ -40,9 +39,17 @@ class VbaHandler(BaseHandler):
The directory in which to look for VBA files.
"""

def __init__(self, *, base_dir: Path, **kwargs: Any) -> None:
encoding: str
"""
The encoding to use when reading VBA files.
Excel exports .bas and .cls files as `latin1`.
See https://en.wikipedia.org/wiki/ISO/IEC_8859-1 .
"""

def __init__(self, *, base_dir: Path, encoding: str, **kwargs: Any) -> None:
super().__init__(**kwargs)
self.base_dir = base_dir
self.encoding = encoding

name: str = "vba"
"""
Expand Down Expand Up @@ -121,9 +128,7 @@ def collect(
if not p.exists():
raise CollectionError("File not found.")

with p.open("r") as f:
code = f.read()

code = p.read_text(encoding=self.encoding, errors="replace")
code = collapse_long_lines(code)

return VbaModuleInfo(
Expand Down Expand Up @@ -178,6 +183,7 @@ def get_handler(
theme: str = "material",
custom_templates: str | None = None,
config_file_path: str | None = None,
encoding: str = "latin1",
**kwargs: Any,
) -> VbaHandler:
"""
Expand All @@ -187,6 +193,10 @@ def get_handler(
theme: The theme to use when rendering contents.
custom_templates: Directory containing custom templates.
config_file_path: The MkDocs configuration file path.
encoding:
The encoding to use when reading VBA files.
Excel exports .bas and .cls files as `latin1`.
See https://en.wikipedia.org/wiki/ISO/IEC_8859-1 .
kwargs: Extra keyword arguments that we don't use.

Returns:
Expand All @@ -198,6 +208,7 @@ def get_handler(
if config_file_path
else Path(".").resolve()
),
encoding=encoding,
handler="vba",
theme=theme,
custom_templates=custom_templates,
Expand Down
Empty file added test/handler/__init__.py
Empty file.
45 changes: 45 additions & 0 deletions test/handler/test_collect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory

from mkdocstrings_handlers.vba import get_handler

# noinspection PyProtectedMember
from mkdocstrings_handlers.vba._types import VbaModuleInfo


def _test_collect(*, write_bytes: bytes, read_encoding: str) -> VbaModuleInfo:
with TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
handler = get_handler(encoding=read_encoding)
p = tmp_dir / "source.bas"
p.write_bytes(write_bytes)
return handler.collect(identifier=p.as_posix(), config={})


class TestCollect(unittest.TestCase):

def test_undefined_unicode(self) -> None:
# See https://symbl.cc/en/unicode-table/#undefined-0 for values that are undefined in Unicode.
# \xe2\xbf\xaf is utf-8 for the undefined Unicode point U+2FEF
module_info = _test_collect(
write_bytes=b"Foo \xe2\xbf\xaf Bar",
read_encoding="utf-8",
)
self.assertEqual(["Foo \u2fef Bar"], module_info.source)

def test_invalid_utf8(self) -> None:
# invalid start byte
module_info = _test_collect(
write_bytes=b"\x89\x89\x89\x89",
read_encoding="utf-8",
)
self.assertEqual(["����"], module_info.source)

def test_invalid_latin1(self) -> None:
module_info = _test_collect(
write_bytes="🎵".encode("utf-8"),
read_encoding="latin1",
)
# Since `latin1` is a single-byte encoding, it can't detect invalid sequences, and so we get mojibake.
self.assertEqual(["ð\x9f\x8eµ"], module_info.source)