Skip to content

Commit

Permalink
feat: Support multiple outputs, glob pattern for inputs, custom title…
Browse files Browse the repository at this point in the history
… and header

Breaking changes: configuration format in mkdocs.yml and public API changed
  • Loading branch information
pawamoy committed Mar 31, 2024
1 parent 0d7aecd commit b45a81e
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 72 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
/.pdm-build/
/htmlcov/
/site/
/share/

# cache
.cache/
Expand Down
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,16 @@ pipx install mkdocs-manpage[preprocess]
plugins:
- manpage:
pages:
- index.md
- usage.md
- reference/api.md
- title: My Project # defaults to site name
output: share/man/man1/my-project.1
inputs:
- index.md
- usage.md
- title: my-project API
header: Python Library APIs # defaults to common header for section 3 (see `man man`)
output: share/man/man3/my_project.3
inputs:
- reference/my_project/*.md
```

To enable/disable the plugin with an environment variable:
Expand All @@ -54,9 +61,6 @@ Then set the environment variable and run MkDocs:
MANPAGE=true mkdocs build
```

The manpage will be written into the root of the site directory
and named `manpage.1`.

### Pre-processing HTML

This plugin works by concatenating the HTML from all selected pages
Expand Down Expand Up @@ -94,7 +98,7 @@ def to_remove(tag: Tag) -> bool:
return False


def preprocess(soup: BeautifulSoup) -> None:
def preprocess(soup: BeautifulSoup, output: str) -> None:
for element in soup.find_all(to_remove):
element.decompose()
```
Expand Down
23 changes: 14 additions & 9 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,20 @@ plugins:
- manpage:
preprocess: scripts/preprocess.py
pages:
- index.md
- changelog.md
- credits.md
- license.md
- contributing.md
- code_of_conduct.md
- insiders/index.md
- insiders/installation.md
- insiders/changelog.md
- title: MkDocs Manpage
header: MkDocs plugins
output: share/man/man1/mkdocs-manpage.1
inputs:
- index.md
- changelog.md
- contributing.md
- credits.md
- license.md
- title: mkdocs-manpage API
header: Python Library APIs
output: share/man/man3/mkdocs_manpage.3
inputs:
- reference/mkdocs_manpage/*.md
- minify:
minify_html: !ENV [DEPLOY, false]
- group:
Expand Down
3 changes: 2 additions & 1 deletion scripts/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ def to_remove(tag: Tag) -> bool:
return False


def preprocess(soup: Soup) -> None:
def preprocess(soup: Soup, output: str) -> None: # noqa: ARG001
"""Pre-process the soup by removing elements.
Parameters:
soup: The soup to modify.
output: The manpage output path.
"""
for element in soup.find_all(to_remove):
element.decompose()
11 changes: 10 additions & 1 deletion src/mkdocs_manpage/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@
from mkdocs.config.base import Config as BaseConfig


class PageConfig(BaseConfig):
"""Sub-config for each manual page."""

title = mkconf.Type(str)
header = mkconf.Type(str)
output = mkconf.File(exists=False)
inputs = mkconf.ListOfItems(mkconf.Type(str))


class PluginConfig(BaseConfig):
"""Configuration options for the plugin."""

enabled = mkconf.Type(bool, default=True)
pages = mkconf.ListOfItems(mkconf.Type(str))
preprocess = mkconf.File(exists=True)
pages = mkconf.ListOfItems(mkconf.SubConfig(PageConfig))
150 changes: 98 additions & 52 deletions src/mkdocs_manpage/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@

from __future__ import annotations

import fnmatch
import subprocess
import tempfile
from collections import defaultdict
from datetime import date
from importlib import metadata
from pathlib import Path
from shutil import which
from typing import TYPE_CHECKING

from mkdocs.config.defaults import MkDocsConfig
from mkdocs.exceptions import PluginError
from mkdocs.plugins import BasePlugin

from mkdocs_manpage.config import PluginConfig
Expand All @@ -20,6 +24,7 @@
from typing import Any

from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.files import Files
from mkdocs.structure.pages import Page


Expand All @@ -32,6 +37,19 @@ def _log_pandoc_output(output: str) -> None:
logger.debug(f"pandoc: {line.strip()}")


section_headers = {
"1": "User Commands",
"2": "System Calls Manual",
"3": "Library Functions Manual",
"4": "Kernel Interfaces Manual",
"5": "File Formats Manual",
"6": "Games Manual",
"7": "Miscellaneous Information Manual",
"8": "System Administration",
"9": "Kernel Routines",
}


class MkdocsManpagePlugin(BasePlugin[PluginConfig]):
"""The MkDocs plugin to generate manpages.
Expand All @@ -47,7 +65,16 @@ class MkdocsManpagePlugin(BasePlugin[PluginConfig]):
mkdocs_config: MkDocsConfig

def __init__(self) -> None: # noqa: D107
self.pages: dict[str, str] = {}
self.html_pages: dict[str, dict[str, str]] = defaultdict(dict)

def _expand_inputs(self, inputs: list[str], page_uris: list[str]) -> list[str]:
expanded: list[str] = []
for input_file in inputs:
if "*" in input_file:
expanded.extend(fnmatch.filter(page_uris, input_file))
else:
expanded.append(input_file)
return expanded

def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
"""Save the global MkDocs configuration.
Expand All @@ -65,6 +92,23 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
self.mkdocs_config = config
return config

def on_files(self, files: Files, *, config: MkDocsConfig) -> Files | None: # noqa: ARG002
"""Expand inputs for manual pages.
Hook for the [`on_files` event](https://www.mkdocs.org/user-guide/plugins/#on_files).
In this hook we expand inputs for each manual pages (glob patterns using `*`).
Parameters:
files: The collection of MkDocs files.
config: The MkDocs configuration.
Returns:
Modified collection or none.
"""
for manpage in self.config.pages:
manpage["inputs"] = self._expand_inputs(manpage["inputs"], page_uris=list(files.src_uris.keys()))
return files

def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002
"""Record pages contents.
Expand All @@ -77,9 +121,10 @@ def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None
"""
if not self.config.enabled:
return None
if page.file.src_uri in self.config.pages or not self.config.pages:
logger.debug(f"Adding page {page.file.src_uri} to manpage")
self.pages[page.file.src_uri] = html
for manpage in self.config.pages:
if page.file.src_uri in manpage["inputs"]:
logger.debug(f"Adding page {page.file.src_uri} to manpage {manpage['output']}")
self.html_pages[manpage["output"]][page.file.src_uri] = html
return html

def on_post_build(self, config: MkDocsConfig, **kwargs: Any) -> None: # noqa: ARG002
Expand All @@ -97,51 +142,52 @@ def on_post_build(self, config: MkDocsConfig, **kwargs: Any) -> None: # noqa: A
if pandoc is None:
logger.debug("Could not find pandoc executable, trying to call 'pandoc' directly")
pandoc = "pandoc"
pages = []
if self.config.pages:
for page in self.config.pages:
try:
pages.append(self.pages[page])
except KeyError:
logger.error(f"No page with path {page}") # noqa: TRY400
else:
pages = list(self.pages.values())
html = "\n\n".join(pages)

if self.config.preprocess:
html = preprocess(html, self.config.preprocess)

output_file = Path(config.site_dir, "manpage.1")
with tempfile.NamedTemporaryFile("w", prefix="mkdocs_manpage_", suffix=".1.html") as temp_file:
temp_file.write(html)
pandoc_variables = [
f"title:{self.mkdocs_config.site_name}",
"section:1",
f"date:{date.today().strftime('%Y-%m-%d')}", # noqa: DTZ011
f"footer:mkdocs-manpage v{metadata.version('mkdocs-manpage')}",
"header:User Commands",
]
pandoc_options = [
"--verbose",
"--standalone",
"--wrap=none",
]
pandoc_command = [
pandoc,
*pandoc_options,
*[f"-V{var}" for var in pandoc_variables],
"--to",
"man",
temp_file.name,
"-o",
str(output_file),
]
pandoc_process = subprocess.run(
pandoc_command, # noqa: S603
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=False,
)
_log_pandoc_output(pandoc_process.stdout)
logger.info(f"Generated manpage at {output_file}")

for page in self.config.pages:
try:
html = "\n\n".join(self.html_pages[page["output"]][input_page] for input_page in page["inputs"])
except KeyError as error:
raise PluginError(str(error)) from error

if self.config.get("preprocess"):
html = preprocess(html, self.config["preprocess"], page["output"])

output_file = Path(config.config_file_path).parent.joinpath(page["output"])
output_file.parent.mkdir(parents=True, exist_ok=True)
section = output_file.suffix[1:]
section_header = page.get("header", section_headers.get(section, section_headers["1"]))
title = page.get("title", self.mkdocs_config.site_name)

with tempfile.NamedTemporaryFile("w", prefix="mkdocs_manpage_", suffix=".1.html") as temp_file:
temp_file.write(html)
pandoc_variables = [
f"title:{title}",
f"section:{section}",
f"date:{date.today().strftime('%Y-%m-%d')}", # noqa: DTZ011
f"footer:mkdocs-manpage v{metadata.version('mkdocs-manpage')}",
f"header:{section_header}",
]
pandoc_options = [
"--verbose",
"--standalone",
"--wrap=none",
]
pandoc_command = [
pandoc,
*pandoc_options,
*[f"-V{var}" for var in pandoc_variables],
"--to",
"man",
temp_file.name,
"-o",
str(output_file),
]
pandoc_process = subprocess.run(
pandoc_command, # noqa: S603
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=False,
)
_log_pandoc_output(pandoc_process.stdout)
logger.info(f"Generated manpage {output_file}")
5 changes: 3 additions & 2 deletions src/mkdocs_manpage/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ def _load_module(module_path: str) -> ModuleType:
raise RuntimeError("Spec or loader is null")


def preprocess(html: str, module_path: str) -> str:
def preprocess(html: str, module_path: str, output: str) -> str:
"""Pre-process HTML with user-defined functions.
Parameters:
html: The HTML to process before conversion to a manpage.
module_path: The path of a Python module containing a `preprocess` function.
The function must accept one and only one argument called `soup`.
The `soup` argument is an instance of [`bs4.BeautifulSoup`][].
output: The output path of the relevant manual page.
Returns:
The processed HTML.
Expand All @@ -49,7 +50,7 @@ def preprocess(html: str, module_path: str) -> str:
raise PluginError(f"Could not load module: {error}") from error
soup = BeautifulSoup(html, "lxml")
try:
module.preprocess(soup)
module.preprocess(soup, output)
except Exception as error: # noqa: BLE001
raise PluginError(f"Could not pre-process HTML: {error}") from error
return str(soup)

0 comments on commit b45a81e

Please sign in to comment.