From 4ba9434e9d988e3535a353f9aa9b76f527ef7fae Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 5 Dec 2022 12:55:45 -0800 Subject: [PATCH 1/3] gh-100001: Omit control characters in http.server stderr logs. (GH-100002) Replace control characters in http.server.BaseHTTPRequestHandler.log_message with an escaped \xHH sequence to avoid causing problems for the terminal the output is printed to. (cherry picked from commit d8ab0a4dfa48f881b4ac9ab857d2e9de42f72828) Co-authored-by: Gregory P. Smith --- Doc/library/http.server.rst | 7 +++++++ Lib/http/server.py | 11 +++++++++- Lib/test/test_httpservers.py | 21 ++++++++++++++++++- ...-12-05-01-39-10.gh-issue-100001.uD05Fc.rst | 6 ++++++ 4 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 4aa10e26f61916..802bbe2a1d0a7e 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -499,3 +499,10 @@ Security Considerations :class:`SimpleHTTPRequestHandler` will follow symbolic links when handling requests, this makes it possible for files outside of the specified directory to be served. + +Earlier versions of Python did not scrub control characters from the +log messages emitted to stderr from ``python -m http.server`` or the +default :class:`BaseHTTPRequestHandler` ``.log_message`` +implementation. This could allow to remote clients connecting to your +server to send nefarious control codes to your terminal. + diff --git a/Lib/http/server.py b/Lib/http/server.py index 6bf9084341a659..44b730e751b179 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -93,6 +93,7 @@ import html import http.client import io +import itertools import mimetypes import os import posixpath @@ -563,6 +564,10 @@ def log_error(self, format, *args): self.log_message(format, *args) + # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes + _control_char_table = str.maketrans( + {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) + def log_message(self, format, *args): """Log an arbitrary message. @@ -578,12 +583,16 @@ def log_message(self, format, *args): The client ip and current date/time are prefixed to every message. + Unicode control characters are replaced with escaped hex + before writing the output to stderr. + """ + message = format % args sys.stderr.write("%s - - [%s] %s\n" % (self.address_string(), self.log_date_time_string(), - format%args)) + message.translate(self._control_char_table))) def version_string(self): """Return the server software version string.""" diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 4acf7a6fea4485..db9ee29e5fbe00 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -26,7 +26,7 @@ import datetime import threading from unittest import mock -from io import BytesIO +from io import BytesIO, StringIO import unittest from test import support @@ -982,6 +982,25 @@ def verify_http_server_response(self, response): match = self.HTTPResponseMatch.search(response) self.assertIsNotNone(match) + def test_unprintable_not_logged(self): + # We call the method from the class directly as our Socketless + # Handler subclass overrode it... nice for everything BUT this test. + self.handler.client_address = ('127.0.0.1', 1337) + log_message = BaseHTTPRequestHandler.log_message + with mock.patch.object(sys, 'stderr', StringIO()) as fake_stderr: + log_message(self.handler, '/foo') + log_message(self.handler, '/\033bar\000\033') + log_message(self.handler, '/spam %s.', 'a') + log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans') + stderr = fake_stderr.getvalue() + self.assertNotIn('\033', stderr) # non-printable chars are caught. + self.assertNotIn('\000', stderr) # non-printable chars are caught. + lines = stderr.splitlines() + self.assertIn('/foo', lines[0]) + self.assertIn(r'/\x1bbar\x00\x1b', lines[1]) + self.assertIn('/spam a.', lines[2]) + self.assertIn('/spam \\x1b\\x7f\\x9f\xa0beans.', lines[3]) + def test_http_1_1(self): result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n') self.verify_http_server_response(result[0]) diff --git a/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst b/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst new file mode 100644 index 00000000000000..a396e95cd83f82 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst @@ -0,0 +1,6 @@ +``python -m http.server`` no longer allows terminal control characters sent +within a garbage request to be printed to the stderr server log. + +This is done by changing the :mod:`http.server` :class:`BaseHTTPRequestHandler` +``.log_message`` method to replace control characters with a ``\xHH`` hex escape +before printing. From 6df2dd24860c4a575d5500d0a609dfa5144e7977 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 5 Dec 2022 14:04:32 -0800 Subject: [PATCH 2/3] also escape \s (backport of PR #100038). --- Lib/http/server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/http/server.py b/Lib/http/server.py index 44b730e751b179..cf8933c3dbd983 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -567,6 +567,7 @@ def log_error(self, format, *args): # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes _control_char_table = str.maketrans( {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) + _control_char_table[ord('\\')] = r'\\' def log_message(self, format, *args): """Log an arbitrary message. From ff0519cabed1be45aed08a1540240d7c94c693ac Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 5 Dec 2022 15:01:40 -0800 Subject: [PATCH 3/3] add versionadded and remove extra 'to' --- Doc/library/http.server.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 802bbe2a1d0a7e..94647e99724b33 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -503,6 +503,8 @@ to be served. Earlier versions of Python did not scrub control characters from the log messages emitted to stderr from ``python -m http.server`` or the default :class:`BaseHTTPRequestHandler` ``.log_message`` -implementation. This could allow to remote clients connecting to your +implementation. This could allow remote clients connecting to your server to send nefarious control codes to your terminal. +.. versionadded:: 3.9.16 + scrubbing control characters from log messages