Merge pull request #37 from krassowski/plain-text

Add plain text and cPython docstring support
python-lsp · Feb 19, 2024 · 4e8f011 · 4e8f011
2 parents e43172e + 08286b6
commit 4e8f011
Show file tree

Hide file tree

Showing 8 changed files with 282 additions and 2 deletions.
diff --git a/docstring_to_markdown/__init__.py b/docstring_to_markdown/__init__.py
@@ -1,7 +1,9 @@
+from .cpython import cpython_to_markdown
 from .google import google_to_markdown, looks_like_google
+from .plain import looks_like_plain_text, plain_text_to_markdown
 from .rst import looks_like_rst, rst_to_markdown
 
-__version__ = "0.13"
+__version__ = "0.14"
 
 
 class UnknownFormatError(Exception):
@@ -15,4 +17,11 @@ def convert(docstring: str) -> str:
     if looks_like_google(docstring):
         return google_to_markdown(docstring)
 
+    if looks_like_plain_text(docstring):
+        return plain_text_to_markdown(docstring)
+
+    cpython = cpython_to_markdown(docstring)
+    if cpython:
+        return cpython
+
     raise UnknownFormatError()
diff --git a/docstring_to_markdown/_utils.py b/docstring_to_markdown/_utils.py
@@ -0,0 +1,5 @@
+from re import sub
+
+
+def escape_markdown(text: str) -> str:
+    return sub(r'([\\#*_[\]])', r'\\\1', text)
diff --git a/docstring_to_markdown/cpython.py b/docstring_to_markdown/cpython.py
@@ -0,0 +1,37 @@
+from typing import Union, List
+from re import fullmatch
+
+from ._utils import escape_markdown
+
+def _is_cpython_signature_line(line: str) -> bool:
+    """CPython uses signature lines in the following format:
+
+    str(bytes_or_buffer[, encoding[, errors]]) -> str
+    """
+    return fullmatch(r'\w+\(\S*(, \S+)*(\[, \S+\])*\)\s--?>\s.+', line) is not None
+
+
+def cpython_to_markdown(text: str) -> Union[str, None]:
+    signature_lines: List[str] = []
+    other_lines: List[str] = []
+    for line in text.splitlines():
+        if not other_lines and _is_cpython_signature_line(line):
+            signature_lines.append(line)
+        elif not signature_lines:
+            return None
+        elif line.startswith('    '):
+            signature_lines.append(line)
+        else:
+            other_lines.append(line)
+    return '\n'.join([
+        '```',
+        '\n'.join(signature_lines),
+        '```',
+        escape_markdown('\n'.join(other_lines))
+    ])
+
+def looks_like_cpython(text: str) -> bool:
+    return cpython_to_markdown(text) is not None
+
+
+__all__ = ['looks_like_cpython', 'cpython_to_markdown']
diff --git a/docstring_to_markdown/plain.py b/docstring_to_markdown/plain.py
@@ -0,0 +1,27 @@
+from re import fullmatch
+from ._utils import escape_markdown
+
+
+def looks_like_plain_text(value: str) -> bool:
+    """Check if given string has plain text following English syntax without need for escaping.
+
+    Accepts:
+    - words without numbers
+    - full stop, bangs and question marks at the end of a word if followed by a space or end of string
+    - commas, colons and semicolons if after a word and followed by a space
+    - dashes between words (like in `e-mail`)
+    - double and single quotes if proceeded with a space and followed by a word, or if proceeded by a word and followed by a space (or end of string); single quotes are also allowed in between two words
+    - parentheses if opening preceded by space and closing followed by space or end
+
+    Does not accept:
+    - square brackets (used in markdown a lot)
+    """
+    if '_' in value:
+        return False
+    return fullmatch(r"((\w[\.!\?\)'\"](\s|$))|(\w[,:;]\s)|(\w[-']\w)|(\w\s['\"\(])|\w|\s)+", value) is not None
+
+
+def plain_text_to_markdown(text: str) -> str:
+    return escape_markdown(text)
+
+__all__ = ['looks_like_plain_text', 'plain_text_to_markdown']
diff --git a/setup.cfg b/setup.cfg
@@ -37,7 +37,7 @@ docstring-to-markdown = py.typed
 addopts =
     --pyargs tests
     --cov docstring_to_markdown
-    --cov-fail-under=98
+    --cov-fail-under=99
     --cov-report term-missing:skip-covered
     -p no:warnings
     --flake8

diff --git a/tests/test_convert.py b/tests/test_convert.py
@@ -0,0 +1,57 @@
+from docstring_to_markdown import convert, UnknownFormatError
+import pytest
+
+CPYTHON = """\
+bool(x) -> bool
+
+Returns True when the argument x is true, False otherwise.\
+"""
+
+
+CPYTHON_MD = """\
+```
+bool(x) -> bool
+```
+
+Returns True when the argument x is true, False otherwise.\
+"""
+
+GOOGLE = """Do **something**.
+
+Args:
+    a: some arg
+    b: some arg
+"""
+
+GOOGLE_MD = """Do **something**.
+
+#### Args
+
+- `a`: some arg
+- `b`: some arg
+"""
+
+
+RST = "Please see `this link<https://example.com>`__."
+RST_MD = "Please see [this link](https://example.com)."
+
+
+def test_convert_cpython():
+    assert convert(CPYTHON) == CPYTHON_MD
+
+
+def test_convert_plain_text():
+    assert convert('This is a sentence.') == 'This is a sentence.'
+
+
+def test_convert_google():
+    assert convert(GOOGLE) == GOOGLE_MD
+
+
+def test_convert_rst():
+    assert convert(RST) == RST_MD
+
+
+def test_unknown_format():
+    with pytest.raises(UnknownFormatError):
+        convert('ARGS [arg1, arg2] RETURNS: str OR None')
diff --git a/tests/test_cpython.py b/tests/test_cpython.py
@@ -0,0 +1,103 @@
+import pytest
+from docstring_to_markdown.cpython import looks_like_cpython, cpython_to_markdown
+
+BOOL = """\
+bool(x) -> bool
+
+Returns True when the argument x is true, False otherwise.\
+"""
+
+BOOL_MD = """\
+```
+bool(x) -> bool
+```
+
+Returns True when the argument x is true, False otherwise.\
+"""
+
+BYTES = """\
+bytes(iterable_of_ints) -> bytes
+bytes(string, encoding[, errors]) -> bytes
+bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
+bytes(int) -> bytes object of size given by the parameter initialized with null bytes
+bytes() -> empty bytes object
+
+Construct an immutable array of bytes from:
+  - an iterable yielding integers in range(256)
+  - a text string encoded using the specified encoding
+  - any object implementing the buffer API.
+  - an integer\
+"""
+
+COLLECTIONS_DEQUEUE = """\
+deque([iterable[, maxlen]]) --> deque object
+
+A list-like sequence optimized for data accesses near its endpoints.\
+"""
+
+DICT = """\
+dict() -> new empty dictionary
+dict(mapping) -> new dictionary initialized from a mapping object's
+    (key, value) pairs
+dict(iterable) -> new dictionary initialized as if via:
+    d = {}
+    for k, v in iterable:
+        d[k] = v
+dict(**kwargs) -> new dictionary initialized with the name=value pairs
+    in the keyword argument list.  For example:  dict(one=1, two=2)\
+"""
+
+STR = """\
+str(object='') -> str
+str(bytes_or_buffer[, encoding[, errors]]) -> str
+
+Create a new string object from the given object. If encoding or
+errors is specified, then the object must expose a data buffer
+that will be decoded using the given encoding and error handler.
+Otherwise, returns the result of object.__str__() (if defined)
+or repr(object).\
+"""
+
+STR_MD = """\
+```
+str(object='') -> str
+str(bytes_or_buffer[, encoding[, errors]]) -> str
+```
+
+Create a new string object from the given object. If encoding or
+errors is specified, then the object must expose a data buffer
+that will be decoded using the given encoding and error handler.
+Otherwise, returns the result of object.\\_\\_str\\_\\_() (if defined)
+or repr(object).\
+"""
+
+
+@pytest.mark.parametrize("text", [BYTES, STR, DICT, BOOL, COLLECTIONS_DEQUEUE])
+def test_accepts_cpython_docstrings(text):
+    assert looks_like_cpython(text) is True
+
+
+@pytest.mark.parametrize("text", [
+    "[link label](https://link)",
+    "![image label](https://source)",
+    "Some **bold** text",
+    "More __bold__ text",
+    "Some *italic* text",
+    "More _italic_ text",
+    "This is a sentence.",
+    "Exclamation!",
+    "Can I ask a question?",
+    "Let's send an e-mail",
+    "Parentheses (are) fine (really)",
+    "Double \"quotes\" and single 'quotes'"
+])
+def test_rejects_markdown_and_plain_text(text):
+    assert looks_like_cpython(text) is False
+
+
+def test_conversion_bool():
+    assert cpython_to_markdown(BOOL) == BOOL_MD
+
+
+def test_conversion_str():
+    assert cpython_to_markdown(STR) == STR_MD
diff --git a/tests/test_plain.py b/tests/test_plain.py
@@ -0,0 +1,42 @@
+import pytest
+from docstring_to_markdown.plain import looks_like_plain_text, plain_text_to_markdown
+
+
+@pytest.mark.parametrize("text", [
+    "This is a sentence.",
+    "Exclamation!",
+    "Can I ask a question?",
+    "Let's send an e-mail",
+    "Parentheses (are) fine (really)",
+    "Double \"quotes\" and single 'quotes'"
+])
+def test_accepts_english(text):
+    assert looks_like_plain_text(text) is True
+
+
+@pytest.mark.parametrize("text", [
+    "[link label](https://link)",
+    "![image label](https://source)",
+    "Some **bold** text",
+    "More __bold__ text",
+    "Some *italic* text",
+    "More _italic_ text"
+])
+def test_rejects_markdown(text):
+    assert looks_like_plain_text(text) is False
+
+
+@pytest.mark.parametrize("text", [
+    "def test():",
+    "print(123)",
+    "func(arg)",
+    "2 + 2",
+    "var['test']",
+    "x = 'test'"
+])
+def test_rejects_code(text):
+    assert looks_like_plain_text(text) is False
+
+
+def test_conversion():
+    assert plain_text_to_markdown("test") == "test"