diff --git a/src/docstub/_docstrings.py b/src/docstub/_docstrings.py index a662a5d..52bc59a 100644 --- a/src/docstub/_docstrings.py +++ b/src/docstub/_docstrings.py @@ -295,58 +295,29 @@ def doctype_to_annotation(self, doctype): self._collected_imports = None self._unknown_qualnames = None - def __default__(self, data, children, meta): - """Unpack children of rule nodes by default. - + def annotation_with_meta(self, tree): + """ Parameters ---------- - data : lark.Token - The rule-token of the current node. - children : list[lark.Token] - The children of the current node. - meta : lark.tree.Meta - Meta information for the current node. + tree : lark.Tree Returns ------- - out : lark.Token or list[lark.Token] - Either a token or list of tokens. + out : str """ - if isinstance(children, list) and len(children) == 1: - out = children[0] - out.type = data.upper() # Turn rule into "token" - else: - out = children - return out - - def annotation(self, tree): - out = " | ".join(tree.children) - return out - - def types_or(self, tree): out = " | ".join(tree.children) return out - def optional(self, tree): - logger.debug("dropping optional / default info") - return lark.Discard - - def extra_info(self, tree): - logger.debug("dropping extra info") - return lark.Discard - - def sphinx_ref(self, tree): - qualname = _find_one_token(tree, name="QUALNAME") - return qualname - - def container(self, tree): - _container, *_content = tree.children - _content = ", ".join(_content) - assert _content - out = f"{_container}[{_content}]" - return out - def qualname(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : lark.Token + """ children = tree.children _qualname = ".".join(children) @@ -368,28 +339,58 @@ def qualname(self, tree): _qualname = lark.Token(type="QUALNAME", value=_qualname) return _qualname - def array_name(self, tree): - qualname = self.qualname(tree) - qualname = lark.Token("ARRAY_NAME", str(qualname)) + def rst_role(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : lark.Token + """ + qualname = _find_one_token(tree, name="QUALNAME") return qualname - def shape(self, tree): - logger.debug("dropping shape information") - return lark.Discard + def or_expression(self, tree): + """ + Parameters + ---------- + tree : lark.Tree - def shape_n_dtype(self, tree): - name = _find_one_token(tree, name="ARRAY_NAME") - children = [child for child in tree.children if child != name] - if children: - name = f"{name}[{', '.join(children)}]" - return name + Returns + ------- + out : str + """ + out = " | ".join(tree.children) + return out - def contains(self, tree): - out = ", ".join(tree.children) - out = f"[{out}]" + def subscription_expression(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : str + """ + _container, *_content = tree.children + _content = ", ".join(_content) + assert _content + out = f"{_container}[{_content}]" return out - def literals(self, tree): + def literal_expression(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : str + """ out = ", ".join(tree.children) out = f"Literal[{out}]" if self.types_db is not None: @@ -398,6 +399,103 @@ def literals(self, tree): self._collected_imports.add(known_import) return out + def array_expression(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : str + """ + name = _find_one_token(tree, name="ARRAY_NAME") + children = [child for child in tree.children if child != name] + if children: + name = f"{name}[{', '.join(children)}]" + return str(name) + + def array_name(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : lark.Token + """ + # Treat `array_name` as `qualname`, but mark it as an array name, + # so we know which one to treat as the container in `array_expression` + # This currently relies on a hack that only allows specific names + # in `array_expression` (see `ARRAY_NAME` terminal in gramar) + qualname = self.qualname(tree) + qualname = lark.Token("ARRAY_NAME", str(qualname)) + return qualname + + def shape(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : lark.visitors._DiscardType + """ + logger.debug("dropping shape information") + return lark.Discard + + def optional(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : lark.visitors._DiscardType + """ + logger.debug("dropping optional / default info") + return lark.Discard + + def extra_info(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : lark.visitors._DiscardType + """ + logger.debug("dropping extra info") + return lark.Discard + + def __default__(self, data, children, meta): + """Unpack children of rule nodes by default. + + Parameters + ---------- + data : lark.Token + The rule-token of the current node. + children : list[lark.Token] + The children of the current node. + meta : lark.tree.Meta + Meta information for the current node. + + Returns + ------- + out : lark.Token or list[lark.Token] + Either a token or list of tokens. + """ + if isinstance(children, list) and len(children) == 1: + out = children[0] + out.type = data.upper() # Turn rule into "token" + else: + out = children + return out + def _match_import(self, qualname, *, meta): """Match `qualname` to known imports or alias to "Incomplete". diff --git a/src/docstub/doctype.lark b/src/docstub/doctype.lark index e7040fd..111173c 100644 --- a/src/docstub/doctype.lark +++ b/src/docstub/doctype.lark @@ -1,73 +1,121 @@ -?start : annotation +// Grammar defining the syntax for docstring type descriptions +// +// Reading and introduction order of rules starts at the top of the tree. -// ---------------------------------------------------------------------------- -annotation : types_or ("," optional)? ("," extra_info)? -?types_or : type (("or" | "|") type)* +?start : annotation_with_meta -?type : qualname - | sphinx_ref - | container - | shape_n_dtype - | literals -optional : "optional" - | "default" ("=" | ":")? literal +// The basic structure of a full docstring annotation as it comes after the +// `name : `. It includes additional meta information that is optional and +// currently ignored. +annotation_with_meta : type ("," optional)? ("," extra_info)? -extra_info : /[^\r\n]+/ -sphinx_ref : (":" (NAME ":")? NAME ":")? "`" qualname "`" +// Just the docstring type annotation without meta information. +?type : + | qualname + | rst_role + | literal_expression + | subscription_expression + | array_expression + | or_expression -container: qualname "[" types_or ("," types_or)* ("," PY_ELLIPSES)? "]" - | qualname "of" type // TODO allow plural somehow, e.g. "list of int(s)"? - | qualname "of" "(" types_or ("," types_or)* ("," PY_ELLIPSES)? ")" - | qualname "of" "{" types_or ":" types_or "}" // Name with leading dot separated path qualname : (/~/ ".")? (NAME ".")* NAME -// ---------------------------------------------------------------------------- -// Array-like form with dtype or shape information +// A qualname can be wrapped in a reStructuredText role, e.g, as used by Sphinx. +// https://docutils.sourceforge.io/docs/ref/rst/roles.html +rst_role : (":" (NAME ":")? NAME ":")? "`" qualname "`" + + +// An union of different types, joined either by "or" or "|" +or_expression : type (("or" | "|") type)+ + + +// An expression where an object is subscribed with "A[v, ...]". We extend this +// syntax with a natural language variant `A of (v, ...)` and `A of {k : v}`. +subscription_expression: + | qualname "[" type ("," type)* ("," ELLIPSES)? "]" + | qualname "of" type // TODO allow plural somehow, e.g. "list of int(s)"? + | qualname "of" "(" type ("," type)* ("," ELLIPSES)? ")" + | qualname "of" "{" type ":" type "}" + + +// An expression combining multiple literals inside curly braces `{l1, l2, ...}` +literal_expression : "{" literal_item ("," literal_item)* "}" + + +// An single item in a literal expression (or `optional`). We must also allow +// for qualified names, since a "class" can be used as a literal too. +?literal_item : + | ELLIPSES + | STRING + | NUMBER + | qualname // TODO should rst_role too? make combined `type qualname | rst_role`? -shape_n_dtype : shape? array_name ("of" dtype)? - | shape? array_name "of" dtype - | shape dtype array_name - | dtype array_name - | array_name "of shape" shape ("and dtype" dtype)? - | array_name "of dtype" dtype ("and shape" shape)? -// Use rule and terminal to capture the array name and its context -// TODO figure out way not to leak implementation here + +// A natural language alternative to describe arrays with a dtype and shape +array_expression : + | array_name "of dtype" dtype ("and shape" shape)? + | array_name "of shape" shape ("and dtype" dtype)? + | shape array_name ("of" dtype)? + | shape? array_name "of" dtype + | shape dtype array_name + | dtype array_name + +// Currently a bit of a hack. Since the `array_expression` is currently so +// ambiguous, we want to make sure it only works for real arrays. For now, we +// are using a hack here, that only allows specific names in `array_name`. In +// the transformer we alias this to qualname. +// TODO figure out less hacky way & allow users to set other array names array_name : ARRAY_NAME ARRAY_NAME : "array" | "ndarray" | "array-like" | "array_like" -dtype : qualname -shape : "(" dim ",)" - | "(" leading_optional? dim (("," dim | insert_optional))* ")" - | NUMBER "-"? "D" -leading_optional : "[" dim ("," dim)* ",]" -insert_optional : "[," dim ("," dim)* "]" -?dim : NUMBER - | PY_ELLIPSES - | NAME ("=" NUMBER)? +// The dtype used in an array expression. +?dtype : qualname + +// The shape used in an array expression. Possibly to liberal right now in +// what it allows. Since there is currently no support to type the shape of +// NumPy arrays, this information is dropped during the transformation. +shape : + | "(" dim ",)" + | "(" leading_optional_dim? dim (("," dim | insert_optional_dim))* ")" + | NUMBER "-"? "D" + +// Optional dimensions in a `shape` expression placed at the start, +// e.g., `([3 ,] N)`. +?leading_optional_dim : "[" dim ("," dim)* ",]" + +// Optional dimensions in a `shape` expression placed anywhere but the start, +// e.g., `(A[, B], C[, D])`. +?insert_optional_dim : "[," dim ("," dim)* "]" +// Dimension can be a number, ellipses ('...') or a simple name. A simple name +// can be bound to a specific number, e.g. `N=3`. +?dim : NUMBER | ELLIPSES | NAME ("=" NUMBER)? -// ---------------------------------------------------------------------------- -// Python -literals : "{" literal ("," literal)* "}" -literal : PY_ELLIPSES - | STRING - | NUMBER - | qualname +// Optional information about a parameter has a default value, added after the +// docstring annotation. Currently dropped during transformation. +optional : "optional" | "default" ("=" | ":")? literal_item + + +// Extra meta information added after the docstring annotation. +// Currently dropped during transformation. +extra_info : /[^\r\n]+/ + -PY_ELLIPSES : "..." +// Allow Python's ellipses object +ELLIPSES : "..." +// A simple name. Can start with a number or character. Can be delimited by "_" +// or "-" but not by ".". NAME: /[^\W\d][\w-]*/ -// ---------------------------------------------------------------------------- -// imports %import python (STRING) %import common (NEWLINE, NUMBER, LETTER, TEXT, WS)