diff --git a/src/docstub/_docstrings.py b/src/docstub/_docstrings.py index 9a92bfe..ebcf0ee 100644 --- a/src/docstub/_docstrings.py +++ b/src/docstub/_docstrings.py @@ -24,7 +24,7 @@ with grammar_path.open() as file: _grammar = file.read() -_lark = lark.Lark(_grammar, propagate_positions=True) +_lark = lark.Lark(_grammar, propagate_positions=True, strict=True) def _find_one_token(tree: lark.Tree, *, name: str) -> lark.Token: @@ -295,19 +295,6 @@ def doctype_to_annotation(self, doctype): self._collected_imports = None self._unknown_qualnames = None - def annotation_with_meta(self, tree): - """ - Parameters - ---------- - tree : lark.Tree - - Returns - ------- - out : str - """ - out = " | ".join(tree.children) - return out - def qualname(self, tree): """ Parameters @@ -352,7 +339,7 @@ def rst_role(self, tree): qualname = _find_one_token(tree, name="QUALNAME") return qualname - def or_expression(self, tree): + def union(self, tree): """ Parameters ---------- @@ -365,7 +352,7 @@ def or_expression(self, tree): out = " | ".join(tree.children) return out - def subscription_expression(self, tree): + def subscription(self, tree): """ Parameters ---------- @@ -381,7 +368,7 @@ def subscription_expression(self, tree): out = f"{_container}[{_content}]" return out - def literal_expression(self, tree): + def natlang_literal(self, tree): """ Parameters ---------- @@ -393,13 +380,34 @@ def literal_expression(self, tree): """ out = ", ".join(tree.children) out = f"Literal[{out}]" + + if len(tree.children): + logger.warning( + "natural language literal with one item `%s`, " + "consider using `%s` to improve readability", + tree.children[0], + out, + ) + if self.types_db is not None: _, known_import = self.types_db.query("Literal") if known_import: self._collected_imports.add(known_import) return out - def array_expression(self, tree): + def natlang_container(self, tree): + """ + Parameters + ---------- + tree : lark.Tree + + Returns + ------- + out : str + """ + return self.subscription(tree) + + def natlang_array(self, tree): """ Parameters ---------- @@ -491,7 +499,8 @@ def __default__(self, data, children, meta): """ if isinstance(children, list) and len(children) == 1: out = children[0] - out.type = data.upper() # Turn rule into "token" + if hasattr(out, "type"): + out.type = data.upper() # Turn rule into "token" else: out = children return out diff --git a/src/docstub/doctype.lark b/src/docstub/doctype.lark index 111173c..1243a73 100644 --- a/src/docstub/doctype.lark +++ b/src/docstub/doctype.lark @@ -1,115 +1,145 @@ // Grammar defining the syntax for docstring type descriptions // // Reading and introduction order of rules starts at the top of the tree. +// +// Reference for Lark grammars: +// https://lark-parser.readthedocs.io/en/latest/grammar.html -?start : annotation_with_meta +?start: annotation_with_meta // The basic structure of a full docstring annotation as it comes after the // `name : `. It includes additional meta information that is optional and // currently ignored. -annotation_with_meta : type ("," optional)? ("," extra_info)? +?annotation_with_meta: type ("," optional)? ("," extra_info)? + + +// A type annotation. Can range from a simple qualified name to a complex +// nested construct of types. +?type: qualname + | union + | subscription + | natlang_literal + | natlang_container + | natlang_array -// Just the docstring type annotation without meta information. -?type : - | qualname - | rst_role - | literal_expression - | subscription_expression - | array_expression - | or_expression +// A qualified name which can contain multiple parts separated by a ".". +// Optionally, "~." can be prefixed to abbreviate a leading part of the name. +// Optionally, a qualname can be wrapped in the style of a reStructuredText +// role [1], e.g, as used by Sphinx. +// [1] https://docutils.sourceforge.io/docs/ref/rst/roles.html +// +qualname: (/~/ ".")? (NAME ".")* NAME + | (":" (NAME ":")? NAME ":")? "`" qualname "`" -> rst_role -// Name with leading dot separated path -qualname : (/~/ ".")? (NAME ".")* NAME +// An union of different types, joined either by "or" or "|". +union: type (_OR type)+ -// A qualname can be wrapped in a reStructuredText role, e.g, as used by Sphinx. -// https://docutils.sourceforge.io/docs/ref/rst/roles.html -rst_role : (":" (NAME ":")? NAME ":")? "`" qualname "`" +// Operator used in unions. +_OR: "or" | "|" -// An union of different types, joined either by "or" or "|" -or_expression : type (("or" | "|") type)+ +// An expression where an object is subscribed with "A[v, ...]". +subscription: qualname "[" type ("," type)* ("," ELLIPSES)? "]" -// An expression where an object is subscribed with "A[v, ...]". We extend this -// syntax with a natural language variant `A of (v, ...)` and `A of {k : v}`. -subscription_expression: - | qualname "[" type ("," type)* ("," ELLIPSES)? "]" - | qualname "of" type // TODO allow plural somehow, e.g. "list of int(s)"? - | qualname "of" "(" type ("," type)* ("," ELLIPSES)? ")" - | qualname "of" "{" type ":" type "}" +// Allow Python's ellipses object +ELLIPSES: "..." -// An expression combining multiple literals inside curly braces `{l1, l2, ...}` -literal_expression : "{" literal_item ("," literal_item)* "}" +// A natural language expression that combines one or multiple literals inside +// curly braces `{l1, l2, ...}` +natlang_literal: "{" literal_item ("," literal_item)* "}" // An single item in a literal expression (or `optional`). We must also allow -// for qualified names, since a "class" can be used as a literal too. -?literal_item : - | ELLIPSES - | STRING - | NUMBER - | qualname // TODO should rst_role too? make combined `type qualname | rst_role`? +// for qualified names, since a "class" or enum can be used as a literal too. +?literal_item: ELLIPSES | STRING | NUMBER | qualname + + +// Natural language forms of the subscription expression for containers. +// These forms allow nesting allow nesting in and with other expressions. But +// it's discouraged to do so extensively to maintain readability. +natlang_container: qualname "of" qualname _PLURAL_S? + | qualname "of" "(" union ")" + | _natlang_tuple + | _natlang_mapping + + +// Indicate the plural version of a qualname by appending "(s)". +// The negative lookbehind in this regex disallows whitespace directly in front +// of this. +_PLURAL_S: /(? str: return name.replace("-", "_").replace(".", "_")