Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add support for reST-style docstrings #274

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
202 changes: 192 additions & 10 deletions pdoc/html_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
from typing import Callable, Match
from warnings import warn
import xml.etree.ElementTree as etree
import docutils.nodes
import docutils.core
from collections import OrderedDict
from typing import Union, List, Dict, Optional


import markdown
from markdown.inlinepatterns import InlineProcessor
Expand Down Expand Up @@ -265,6 +270,164 @@ def googledoc_sections(match):
r'((?:\n?(?: {2,}.*|$))+)', re.MULTILINE).sub(googledoc_sections, text)
return text

@staticmethod
def _reST_string_to_html(text: str) -> str:
"""Convert reST text to html using docutils

:param text: The text to convert
:returns: The generated html
"""
html = docutils.core.publish_parts(text, writer_name='html')['html_body']

# Remove the document tag and return
return html[23:-8]

@staticmethod
def _reST_node_to_html(node: docutils.nodes.Node,
doctree: docutils.nodes.document) -> str:
"""Not all nodes in the doctree provide their reST source or at least the
starting line in the reST source. This method simply copies the document
tree and removes all but the node to then publish it.

:node: The node to publish. Must be a child of `doctree` itself
:doctree: The document having `node` as a child node
:return: The generated html for this node
"""
# Remove all but the given node from the doctree
children_copy = doctree.children
doctree.children = [doctree.children[doctree.index(node)]]

# Generate the html for this node/doctree
html = docutils.core.publish_from_doctree(doctree, writer_name='html5').decode('utf-8')

# Restore the doctree
doctree.children = children_copy

# Return only the relevant part of the html
match = re.search(r'<div class="document">(.+)</div>', html, re.DOTALL)

if match is not None:
return match.group(1).strip()
else:
# The generated HTML from docutils.publish_from_doctree() should always contain a
# div with class "document" in which all generated content is located. However, in case
# it doesn't, it's probably empty so return an empty string
return ''

@staticmethod
def _reST_field_list_to_markdown(field_list: Union[docutils.nodes.field_list,
docutils.nodes.docinfo]) -> str:
"""Processes a docutils field list and converts it to markdown.
Args, Vars, Returns, and Raises sections are predefined, other sections
will be created corresponding to their field names.

:param field_list: A docutils field list to convert. Can also be a docinfo
in case, e.g., only :returns: is specified without any summary text
:returns: The generated Markdown. However, it is not pure Markdown, as
the field descriptions have been processed with `docutils.process_string` - they
therefore are already converted to HTML
"""
# Sort the field list so that types come last - in case someone defines first the type then
# the parameter
field_list.children.sort(key=lambda field: 'type' in field[0].rawsource.split()[0])

# Predefined sections for the generated markdown
tags_to_section_map = {
('param', 'parameter', 'arg', 'argument', 'key', 'keyword'): 'Args',
('var', 'ivar', 'cvar'): 'Vars',
('return', 'returns'): 'Returns',
('raise', 'raises'): 'Raises'
}
sections: OrderedDict[str, List[Dict[str, Optional[str]]]] = OrderedDict(
[('Args', []), ('Vars', []), ('Returns', []), ('Raises', [])])

# Process the fields
for field in field_list:
field_name = field.children[0]
field_body = field.children[1]

# Split the field name into its components
split = field_name.rawsource.split()
tag = split[0]
type_ = split[1] if len(split) == 3 else None
name = split[2] if len(split) == 3 else split[1] if len(split) == 2 else None

# Fill the sections
try:
section: Optional[str] = [section for tags, section in tags_to_section_map.items()
if tag in tags][0]
except IndexError: # Field is not corresponding to a predefined section like Args
section = None

if section is not None:
sections[section].append({'name': name,
'type': type_,
'body': _ToMarkdown._reST_string_to_html(
field_body.rawsource)})
elif tag == 'rtype':
# Set the return type. Assumes that at most one :return: has been specified
try:
sections['Returns'][0]['type'] = field_body.rawsource
except IndexError: # Only return type is specified
sections['Returns'].append({'name': None,
'type': field_body.rawsource.strip(),
'body': ''})
elif 'type' in tag:
section = 'Vars' if tag == 'vartype' else 'Args'
try:
param_or_var = [x for x in sections[section] if x['name'] == name][0]
param_or_var['type'] = field_body.rawsource.strip()
except IndexError: # Only parameter (or variable) type is specified
sections[section].append({'name': name,
'type': field_body.rawsource.strip(),
'body': ''})
elif tag == 'meta':
pass # Meta fields should be excluded from the final output
else:
# Generate sections for tags not covered yet
new_section = sections.get(tag, [])
new_section.append({'name': name,
'type': type_,
'body': _ToMarkdown._reST_string_to_html(field_body.rawsource)})
sections[tag] = new_section

# Generate the markdown for this field list
markdown = []
for section, fields in sections.items():
if len(fields) > 0: # Skip empty sections
markdown.append(f'{section}:\n-----=')

for field in fields:
field['body'] = field['body'].replace('\n', '\n ') # For proper indentation
if field['name'] or field['type']:
markdown.append(
_ToMarkdown._deflist(*_ToMarkdown._fix_indent(field['name'],
field['type'],
field['body'])))
else: # For fields with no name or type (e.g. Returns without type spec)
text = _ToMarkdown._fix_indent(
field['name'], field['type'], field['body'])[2]
markdown.append(f': {text}')

return '\n'.join(markdown)

@staticmethod
def reST(text: str) -> str:
"""
Convert `text` in reST-style docstring format to Markdown - with embedded html
for paragraphs and field descriptions - to be further converted later.
"""
doctree = docutils.core.publish_doctree(text)

generated_markdown = []
for section in doctree:
if section.tagname in ('field_list', 'docinfo'):
generated_markdown.append(_ToMarkdown._reST_field_list_to_markdown(section))
else:
generated_markdown.append(_ToMarkdown._reST_node_to_html(section, doctree))

return '\n'.join(generated_markdown)

@staticmethod
def _admonition(match, module=None, limit_types=None):
indent, type, value, text = match.groups()
Expand Down Expand Up @@ -406,8 +569,9 @@ def to_html(text: str, *,
latex_math: bool = False):
"""
Returns HTML of `text` interpreted as `docformat`. `__docformat__` is respected
if present, otherwise Numpydoc and Google-style docstrings are assumed,
as well as pure Markdown.
if present, otherwise it is inferred whether it's reST-style, or Numpydoc
and Google-style docstrings. Pure Markdown and reST directives are also assumed
and processed if docformat has not been specified.

`module` should be the documented module (so the references can be
resolved) and `link` is the hyperlinking function like the one in the
Expand All @@ -430,38 +594,56 @@ def to_markdown(text: str, *,
module: pdoc.Module = None, link: Callable[..., str] = None):
"""
Returns `text`, assumed to be a docstring in `docformat`, converted to markdown.
`__docformat__` is respected
if present, otherwise Numpydoc and Google-style docstrings are assumed,
as well as pure Markdown.
`__docformat__` is respected if present, otherwise it is inferred whether it's
reST-style, or Numpydoc and Google-style docstrings. Pure Markdown and reST directives
are also assumed and processed if docformat has not been specified.

`module` should be the documented module (so the references can be
resolved) and `link` is the hyperlinking function like the one in the
example template.
"""
if not docformat:
docformat = str(getattr(getattr(module, 'obj', None), '__docformat__', 'numpy,google '))
docformat = str(getattr(getattr(module, 'obj', None), '__docformat__', ''))

# Infer docformat if it hasn't been specified
if docformat == '':
reST_tags = ['param ', 'arg ', 'type ', 'raise ', 'except ', 'return', 'rtype']
reST_regex = fr'^:(?:{"|".join(reST_tags)}).*?:'
kernc marked this conversation as resolved.
Show resolved Hide resolved
found_reST_tags = re.findall(reST_regex, text, re.MULTILINE)

# Assume reST-style docstring if any of the above specified tags is present at the
# beginning of a line. Could make this more robust, e.g., by checking against the
# amount of found google or numpy tags
docformat = 'restructuredtext ' if len(found_reST_tags) > 0 else 'numpy,google '

docformat, *_ = docformat.lower().split()
if not (set(docformat.split(',')) & {'', 'numpy', 'google'}):

if not (set(docformat.split(',')) & {'', 'numpy', 'google', 'restructuredtext'}):
warn('__docformat__ value {!r} in module {!r} not supported. '
'Supported values are: numpy, google.'.format(docformat, module))
'Supported values are: numpy, google, restructuredtext.'.format(docformat, module))
docformat = 'numpy,google'

with _fenced_code_blocks_hidden(text) as result:
text = result[0]

text = _ToMarkdown.admonitions(text, module)
if 'restructuredtext' not in docformat: # Will be handled by docutils
text = _ToMarkdown.admonitions(text, module)

if 'google' in docformat:
text = _ToMarkdown.google(text)

text = _ToMarkdown.doctests(text)
text = _ToMarkdown.raw_urls(text)
if 'restructuredtext' not in docformat: # Will be handled by docutils
text = _ToMarkdown.raw_urls(text)

# If doing both, do numpy after google, otherwise google-style's
# headings are incorrectly interpreted as numpy params
if 'numpy' in docformat:
text = _ToMarkdown.numpy(text)

if 'restructuredtext' in docformat:
text = _ToMarkdown.reST(text)

if module and link:
# Hyperlink markdown code spans not within markdown hyperlinks.
# E.g. `code` yes, but not [`code`](...). RE adapted from:
Expand Down
118 changes: 117 additions & 1 deletion pdoc/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def test_html_ref_links(self):

def test_docformat(self):
with self.assertWarns(UserWarning) as cm,\
run_html(EXAMPLE_MODULE, config='docformat="restructuredtext"'):
run_html(EXAMPLE_MODULE, config='docformat="epytext"'):
self._basic_html_assertions()
self.assertIn('numpy', cm.warning.args[0])

Expand Down Expand Up @@ -1435,6 +1435,121 @@ def test_doctests(self):
html = to_html(text, module=self._module, link=self._link)
self.assertEqual(html, expected)

def test_reST(self):
expected = '''<p>Summary line.</p>
<p>Some stuff to test like <a class="reference external" href="http://www.python.org">http://www.python.org</a> or <a class="reference external" href="http://www.python.org">link_text</a>.
Also <em>italic</em> and <strong>bold</strong>. And lists:</p>
<ul class="simple">
<li><p>1</p></li>
<li><p>2</p></li>
</ul>
<ol class="arabic simple">
<li><p>Item</p></li>
<li><p>Item</p></li>
</ol>
<h2 id="args">Args:</h2>
<dl>
<dt><strong><code>arg1</code></strong> :&ensp;<code>int</code></dt>
<dd><p>Text1</p></dd>
<dt><strong><code>arg2</code></strong> :&ensp;<code>Optional[List[Tuple[str]]]</code></dt>
<dd><p>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed
diam nonumy eirmod tempor invidunt</p></dd>
<dt><strong><code>arg_arg_3</code></strong> :&ensp;<code>Dict[int, Dict[str, Any]]</code></dt>
<dd><p>Y:=H^T<em>X!&#64;#$%^&amp;&amp;</em>()_[]{}';'::{[( :param just_in_case:</p></dd>
<dt><strong><code>another_parameter</code></strong> :&ensp;<code>str</code></dt>
<dd>&nbsp;</dd>
</dl>
<h2 id="vars">Vars:</h2>
<dl>
<dt><strong><code>x</code></strong></dt>
<dd><p>Description of variable x</p></dd>
<dt><strong><code>y</code></strong> :&ensp;<code>List[bool]</code></dt>
<dd><p>Description of variable y</p></dd>
<dt><strong><code>z</code></strong> :&ensp;<code>str</code></dt>
<dd><p>Descriptions can also be placed in a new line.</p>
<p>And span multiple lines.</p></dd>
</dl>
<h2 id="returns">Returns:</h2>
<dl>
<dt><code>bool</code></dt>
<dd><p>True. Or False. Depends</p></dd>
</dl>
<p>A paragraph to split the field list into two.</p>
<h2 id="returns_1">Returns:</h2>
<dl>
<dd>
<dl>
<dt><p>Now with more &quot;s&quot;</p></dt>
<dt>Raises:</dt>
<dt>-----=</dt>
<dt><strong><code>Exception</code></strong></dt>
<dd><p>Raised occasionally</p></dd>
</dl>
</dd>
<dt><strong><code>ZeroDivisionError</code></strong></dt>
<dd><p>You know why and when</p></dd>
</dl>
<p>Some more tests below:</p>
<h2 id="args_1">Args:</h2>
<dl>
<dt><strong><code>z</code></strong> :&ensp;<code>str</code></dt>
<dd>&nbsp;</dd>
</dl>
<h2 id="vars_1">Vars:</h2>
<dl>
<dt><strong><code>x</code></strong> :&ensp;<code>int</code></dt>
<dd>&nbsp;</dd>
</dl>
<h2 id="returns_2">Returns:</h2>
<dl>
<dt><code>int</code></dt>
<dd>&nbsp;</dd>
</dl>
<p>And now for some other stuff</p>
<div class="admonition admonition-todo">
<p class="admonition-title">TODO</p>
<p>Create something.</p>
</div>
<div class="admonition admonition-example">
<p class="admonition-title">Example</p>
<p>Image shows something.</p>
<img alt="https://www.debian.org/logos/openlogo-nd-100.png" src="https://www.debian.org/logos/openlogo-nd-100.png" />
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Can only nest admonitions two levels.</p>
</div>
</div>
<p><img alt="https://www.debian.org/logos/openlogo-nd-100.png" src="https://www.debian.org/logos/openlogo-nd-100.png" /></p>
<p>Now you know.</p>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>Some warning
lines.</p>
</div>
<ul>
<li><p>Describe some func in a list
across multiple lines:</p>
<blockquote>
<div class="admonition admonition-deprecated-since-3-1">
<p class="admonition-title">Deprecated since 3.1</p>
<p>Use <cite>spam</cite> instead.</p>
</div>
<div class="admonition admonition-added-in-version-2-5">
<p class="admonition-title">Added in version 2.5</p>
<p>The <em>spam</em> parameter.</p>
</div>
</blockquote>
</li>
</ul>
<div class="admonition caution">
<p class="admonition-title">Caution!</p>
<p>Don't touch this!</p>
</div>''' # noqa: 501
text = inspect.getdoc(self._docmodule.reST)
html = to_html(text, module=self._module, link=self._link)

self.assertEqual(html, expected)

def test_reST_directives(self):
expected = '''<div class="admonition todo">
<p class="admonition-title">TODO</p>
Expand Down Expand Up @@ -1476,6 +1591,7 @@ def test_reST_directives(self):
</div>'''
text = inspect.getdoc(self._docmodule.reST_directives)
html = to_html(text, module=self._module, link=self._link)

self.assertEqual(html, expected)

def test_reST_include(self):
Expand Down