diff --git a/readthedocs/config/tests/test_yaml_loader.py b/readthedocs/config/tests/test_yaml_loader.py new file mode 100644 index 00000000000..f5fe980b09b --- /dev/null +++ b/readthedocs/config/tests/test_yaml_loader.py @@ -0,0 +1,28 @@ +from readthedocs.doc_builder.backends.mkdocs import ProxyPythonName, yaml_load_safely + +content = """ +int: 3 +float: !!float 3 +function: !!python/name:python_function +other_function: !!python/name:module.other.function +unknown: !!python/module:python_module +""" + + +def test_yaml_load_safely(): + expected = { + "int": 3, + "float": 3.0, + "function": ProxyPythonName("python_function"), + "other_function": ProxyPythonName("module.other.function"), + "unknown": None, + } + data = yaml_load_safely(content) + + assert data == expected + assert type(data["int"]) is int + assert type(data["float"]) is float + assert type(data["function"]) is ProxyPythonName + assert type(data["other_function"]) is ProxyPythonName + assert data["function"].value == "python_function" + assert data["other_function"].value == "module.other.function" diff --git a/readthedocs/doc_builder/backends/mkdocs.py b/readthedocs/doc_builder/backends/mkdocs.py index bf15ef60b00..16276a9dcd2 100644 --- a/readthedocs/doc_builder/backends/mkdocs.py +++ b/readthedocs/doc_builder/backends/mkdocs.py @@ -63,8 +63,7 @@ def get_final_doctype(self): allow_symlinks=True, base_path=self.project_path, ) as fh: - # Use ``.safe_load()`` since ``mkdocs.yml`` is an untrusted source. - config = yaml.safe_load(fh) + config = yaml_load_safely(fh) use_directory_urls = config.get("use_directory_urls", True) return MKDOCS if use_directory_urls else MKDOCS_HTML @@ -120,3 +119,62 @@ def build(self): class MkdocsHTML(BaseMkdocs): builder = "build" build_dir = "_readthedocs/html" + + +class ProxyPythonName(yaml.YAMLObject): + def __init__(self, value): + self.value = value + + def __eq__(self, other): + return self.value == other.value + + +class SafeLoader(yaml.SafeLoader): # pylint: disable=too-many-ancestors + + """ + Safe YAML loader. + + This loader parses special ``!!python/name:`` tags without actually + importing or executing code. Every other special tag is ignored. + + Borrowed from https://stackoverflow.com/a/57121993 + Issue https://github.com/readthedocs/readthedocs.org/issues/7461 + """ + + def ignore_unknown(self, node): # pylint: disable=unused-argument + return None + + def construct_python_name(self, suffix, node): # pylint: disable=unused-argument + return ProxyPythonName(suffix) + + +class SafeDumper(yaml.SafeDumper): + + """ + Safe YAML dumper. + + This dumper allows to avoid losing values of special tags that + were parsed by our safe loader. + """ + + def represent_name(self, data): + return self.represent_scalar("tag:yaml.org,2002:python/name:" + data.value, "") + + +SafeLoader.add_multi_constructor( + "tag:yaml.org,2002:python/name:", SafeLoader.construct_python_name +) +SafeLoader.add_constructor(None, SafeLoader.ignore_unknown) +SafeDumper.add_representer(ProxyPythonName, SafeDumper.represent_name) + + +def yaml_load_safely(content): + """ + Uses ``SafeLoader`` loader to skip unknown tags. + + When a YAML contains ``!!python/name:int`` it will store the ``int`` + suffix temporarily to be able to re-dump it later. We need this to avoid + executing random code, but still support these YAML files without + information loss. + """ + return yaml.load(content, Loader=SafeLoader)