Skip to content

Commit

Permalink
PERF, validation: wrap inspect.getsourcelines with cache (#532)
Browse files Browse the repository at this point in the history
* wrap inspect.getsourcelines with cache
  • Loading branch information
dontgoto committed Mar 20, 2024
1 parent 1e60071 commit 19167fe
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions numpydoc/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""

from copy import deepcopy
from typing import Dict, List, Set, Optional
from typing import Dict, List, Set, Optional, Any
import ast
import collections
import functools
Expand Down Expand Up @@ -116,7 +116,7 @@
# We have to balance memory usage with performance here. It shouldn't be too
# bad to store these `dict`s (they should be rare), but to be safe let's keep
# the limit low-ish. This was set by looking at scipy, numpy, matplotlib,
# and pandas and they had between ~500 and ~1300 .py files as of 2023-08-16.
# and pandas, and they had between ~500 and ~1300 .py files as of 2023-08-16.
@functools.lru_cache(maxsize=2000)
def extract_ignore_validation_comments(
filepath: Optional[os.PathLike],
Expand Down Expand Up @@ -212,7 +212,7 @@ def error(code, **kwargs):
message : str
Error message with variables replaced.
"""
return (code, ERROR_MSGS[code].format(**kwargs))
return code, ERROR_MSGS[code].format(**kwargs)


class Validator:
Expand Down Expand Up @@ -290,24 +290,30 @@ def source_file_name(self):

except TypeError:
# In some cases the object is something complex like a cython
# object that can't be easily introspected. An it's better to
# object that can't be easily introspected. And it's better to
# return the source code file of the object as None, than crash
pass
else:
return fname

# When calling validate, files are parsed twice
@staticmethod
@functools.lru_cache(maxsize=4000)
def _getsourcelines(obj: Any):
return inspect.getsourcelines(obj)

@property
def source_file_def_line(self):
"""
Number of line where the object is defined in its file.
"""
try:
if isinstance(self.code_obj, property):
sourcelines = inspect.getsourcelines(self.code_obj.fget)
sourcelines = self._getsourcelines(self.code_obj.fget)
elif isinstance(self.code_obj, functools.cached_property):
sourcelines = inspect.getsourcelines(self.code_obj.func)
sourcelines = self._getsourcelines(self.code_obj.func)
else:
sourcelines = inspect.getsourcelines(self.code_obj)
sourcelines = self._getsourcelines(self.code_obj)
# getsourcelines will return the line of the first decorator found for the
# current function. We have to find the def declaration after that.
def_line = next(
Expand All @@ -320,7 +326,7 @@ def source_file_def_line(self):
return sourcelines[-1] + def_line
except (OSError, TypeError):
# In some cases the object is something complex like a cython
# object that can't be easily introspected. An it's better to
# object that can't be easily introspected. And it's better to
# return the line number as None, than crash
pass

Expand Down Expand Up @@ -613,7 +619,7 @@ def validate(obj_name, validator_cls=None, **validator_kwargs):
else:
doc = validator_cls(obj_name=obj_name, **validator_kwargs)

# lineno is only 0 if we have a module docstring in the file and we are
# lineno is only 0 if we have a module docstring in the file, and we are
# validating that, so we change to 1 for readability of the output
ignore_validation_comments = extract_ignore_validation_comments(
doc.source_file_name
Expand Down

0 comments on commit 19167fe

Please sign in to comment.