From 8afb67e672ee11f2736154870aa51971a20bfc02 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 23 Jan 2021 17:12:30 +0200 Subject: [PATCH 1/4] bpo-42988: Fix security issue in the pydoc server Route `/getfile?key=` no longer allows to get content of arbitrary file. It serves now only source files of Python modules available to pydoc. --- Lib/pydoc.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 282a9179983407a..767efd1710ea697 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2544,9 +2544,38 @@ def bltinlink(name): 'key = %s' % key, '#ffffff', '#ee77aa', '
'.join(results)) return 'Search Results', contents + def validate_source_path(path): + for importer, modname, ispkg in pkgutil.walk_packages(): + try: + spec = pkgutil._get_spec(importer, modname) + except SyntaxError: + # raised by tests for bad coding cookies or BOM + continue + loader = spec.loader + if hasattr(loader, 'get_source'): + try: + source = loader.get_source(modname) + except Exception: + continue + if hasattr(loader, 'get_filename'): + sourcepath = loader.get_filename(modname) + if path == sourcepath: + return + else: + try: + module = importlib._bootstrap._load(spec) + except ImportError: + continue + sourcepath = getattr(module, '__file__', None) + if path == sourcepath: + return + else: + raise ValueError('not found') + def html_getfile(path): """Get and display a source file listing safely.""" path = urllib.parse.unquote(path) + validate_source_path(path) with tokenize.open(path) as fp: lines = html.escape(fp.read()) body = '
%s
' % lines From e1c135376796a3143411789c5bdc69112dedf77c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 26 Jan 2021 15:43:51 +0200 Subject: [PATCH 2/4] Fix test. --- Lib/pydoc.py | 2 +- Lib/test/test_pydoc.py | 9 ++++++++- .../Security/2021-01-26-15-00-41.bpo-42988.PGpcqg.rst | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2021-01-26-15-00-41.bpo-42988.PGpcqg.rst diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 767efd1710ea697..231ff154d8589c3 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2570,7 +2570,7 @@ def validate_source_path(path): if path == sourcepath: return else: - raise ValueError('not found') + raise ValueError('not found {found}') def html_getfile(path): """Get and display a source file listing safely.""" diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py index 2f502627f4d0a28..54a87a6b7692e9a 100644 --- a/Lib/test/test_pydoc.py +++ b/Lib/test/test_pydoc.py @@ -1381,11 +1381,18 @@ def test_url_requests(self): for url, title in requests: self.call_url_handler(url, title) - path = string.__file__ + # File in restricted walk_packages path. + path = __file__ title = "Pydoc: getfile " + path url = "getfile?key=" + path self.call_url_handler(url, title) + # File outside of restricted walk_packages path. + path = pydoc.__file__ + title = "Pydoc: Error - getfile?key=" + path + url = "getfile?key=" + path + self.call_url_handler(url, title) + class TestHelper(unittest.TestCase): def test_keywords(self): diff --git a/Misc/NEWS.d/next/Security/2021-01-26-15-00-41.bpo-42988.PGpcqg.rst b/Misc/NEWS.d/next/Security/2021-01-26-15-00-41.bpo-42988.PGpcqg.rst new file mode 100644 index 000000000000000..f38c1987f97b536 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2021-01-26-15-00-41.bpo-42988.PGpcqg.rst @@ -0,0 +1,3 @@ +The ``/getfile?key=`` route of the :mod:`pydoc` Web server checks now that +the argument is the file path of the source of one of modules. It prevents +reading arbitrary files. From 54210669174e8bb017e0a465fb2ac10a4df8c5ff Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 26 Jan 2021 15:53:01 +0200 Subject: [PATCH 3/4] Add more tests. --- Lib/test/test_pydoc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py index 54a87a6b7692e9a..7356ff9df121b70 100644 --- a/Lib/test/test_pydoc.py +++ b/Lib/test/test_pydoc.py @@ -1369,7 +1369,11 @@ def test_url_requests(self): ("topics", "Pydoc: Topics"), ("keywords", "Pydoc: Keywords"), ("pydoc", "Pydoc: module pydoc"), + ("test.test_pydoc", "Pydoc: module test.test_pydoc"), ("get?key=pydoc", "Pydoc: module pydoc"), + ("get?key=test.test_pydoc", "Pydoc: module test.test_pydoc"), + ("get?key=html", "Pydoc: package html"), + ("get?key=sys", "Pydoc: built-in module sys"), ("search?key=pydoc", "Pydoc: Search Results"), ("topic?key=def", "Pydoc: KEYWORD def"), ("topic?key=STRINGS", "Pydoc: TOPIC STRINGS"), From cbc2b02344dcf038c0b925a5e898dae30137c14c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 28 Jan 2021 17:15:29 +0200 Subject: [PATCH 4/4] Ignore all errors and warnings during import. --- Lib/pydoc.py | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 231ff154d8589c3..f1090c095b07f8e 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2545,32 +2545,36 @@ def bltinlink(name): return 'Search Results', contents def validate_source_path(path): - for importer, modname, ispkg in pkgutil.walk_packages(): - try: - spec = pkgutil._get_spec(importer, modname) - except SyntaxError: - # raised by tests for bad coding cookies or BOM - continue - loader = spec.loader - if hasattr(loader, 'get_source'): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore') # ignore problems during import + def onerror(modname): + pass + for importer, modname, ispkg in pkgutil.walk_packages(onerror=onerror): try: - source = loader.get_source(modname) - except Exception: + spec = pkgutil._get_spec(importer, modname) + except SyntaxError: + # raised by tests for bad coding cookies or BOM continue - if hasattr(loader, 'get_filename'): - sourcepath = loader.get_filename(modname) + loader = spec.loader + if hasattr(loader, 'get_source'): + try: + source = loader.get_source(modname) + except Exception: + continue + if hasattr(loader, 'get_filename'): + sourcepath = loader.get_filename(modname) + if path == sourcepath: + return + else: + try: + module = importlib._bootstrap._load(spec) + except ImportError: + continue + sourcepath = getattr(module, '__file__', None) if path == sourcepath: return else: - try: - module = importlib._bootstrap._load(spec) - except ImportError: - continue - sourcepath = getattr(module, '__file__', None) - if path == sourcepath: - return - else: - raise ValueError('not found {found}') + raise ValueError('not found {found}') def html_getfile(path): """Get and display a source file listing safely."""