Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mypy-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ typing_extensions>=4.6.0
mypy_extensions>=1.0.0
pathspec>=0.9.0
tomli>=1.1.0; python_version<'3.11'
librt>=0.3.0
librt>=0.4.0
18 changes: 14 additions & 4 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Final, NoReturn, TextIO, TypedDict
from typing_extensions import TypeAlias as _TypeAlias

from librt.internal import cache_version

import mypy.semanal_main
from mypy.cache import Buffer, CacheMeta
from mypy.cache import CACHE_VERSION, Buffer, CacheMeta
from mypy.checker import TypeChecker
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
Expand Down Expand Up @@ -1334,12 +1336,18 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | No
return None
t1 = time.time()
if isinstance(meta, bytes):
data_io = Buffer(meta)
# If either low-level buffer format or high-level cache layout changed, we
# cannot use the cache files, even with --skip-version-check.
# TODO: switch to something like librt.internal.read_byte() if this is slow.
if meta[0] != cache_version() or meta[1] != CACHE_VERSION:
manager.log(f"Metadata abandoned for {id}: incompatible cache format")
return None
data_io = Buffer(meta[2:])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The slice operation does an almost full copy of the meta buffer. It's probably not a big deal, but it would be nice if we could avoid it. What about adding read_byte operation in the internal API (that would be just an alias to read_tag for now I guess) that could be used to read the initial two bytes? (No need to do this in this PR.)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also maybe slice the memoryview?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I just measured this slice on a "micro-benchmark", it takes around 0.1 microsecond per file (interpreted). So even with 10K files in the build this will be an extra millisecond. It is probably even less when compiled. I will add a TODO here and below.

m = CacheMeta.read(data_io, data_file)
else:
m = CacheMeta.deserialize(meta, data_file)
if m is None:
manager.log(f"Metadata abandoned for {id}: attributes are missing")
manager.log(f"Metadata abandoned for {id}: cannot deserialize data")
return None
t2 = time.time()
manager.add_stats(
Expand Down Expand Up @@ -1671,7 +1679,9 @@ def write_cache_meta(meta: CacheMeta, manager: BuildManager, meta_file: str) ->
if manager.options.fixed_format_cache:
data_io = Buffer()
meta.write(data_io)
meta_bytes = data_io.getvalue()
# Prefix with both low- and high-level cache format versions for future validation.
# TODO: switch to something like librt.internal.write_byte() if this is slow.
meta_bytes = bytes([cache_version(), CACHE_VERSION]) + data_io.getvalue()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to above, the concatenate operation does a full copy of the cache data. We could add a write_byte operation to write the version bytes to a Buffer object in a future-proof way.

else:
meta_dict = meta.serialize()
meta_bytes = json_dumps(meta_dict, manager.options.debug_cache)
Expand Down
6 changes: 6 additions & 0 deletions mypy/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
serialization. The write method should write both class tag and end tag. The read method
conventionally *does not* read the start tag (to simplify logic for unions). Known exceptions
are MypyFile.read() and SymbolTableNode.read(), since those two never appear in a union.

If any of these details change, or if the structure of CacheMeta changes please
bump CACHE_VERSION below.
"""

from __future__ import annotations
Expand All @@ -65,6 +68,9 @@
)
from mypy_extensions import u8

# High-level cache layout format
CACHE_VERSION: Final = 0


class CacheMeta:
"""Class representing cache metadata for a module."""
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ requires = [
"mypy_extensions>=1.0.0",
"pathspec>=0.9.0",
"tomli>=1.1.0; python_version<'3.11'",
"librt>=0.3.0",
"librt>=0.4.0",
# the following is from build-requirements.txt
"types-psutil",
"types-setuptools",
Expand Down Expand Up @@ -54,7 +54,7 @@ dependencies = [
"mypy_extensions>=1.0.0",
"pathspec>=0.9.0",
"tomli>=1.1.0; python_version<'3.11'",
"librt>=0.3.0",
"librt>=0.4.0",
]
dynamic = ["version"]

Expand Down
2 changes: 1 addition & 1 deletion test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ identify==2.6.15
# via pre-commit
iniconfig==2.1.0
# via pytest
librt==0.3.0
librt==0.4.0
# via -r mypy-requirements.txt
lxml==6.0.2 ; python_version < "3.15"
# via -r test-requirements.in
Expand Down