From 3fbcf94a34334cce4558809498965eeb2e69d17d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 7 May 2026 22:06:57 +0300 Subject: [PATCH 1/5] gh-79638: Test other HTTP error codes besides 403 in test_robotparser --- Lib/test/test_robotparser.py | 83 +++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 3ea0ec66fbfbe9..4985d9cffb8947 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -646,26 +646,24 @@ def test_group_without_user_agent(self): ) class BaseLocalNetworkTestCase: - def setUp(self): + @classmethod + def setUpClass(cls): + cls.addClassCleanup(print, 'CLEANED UP') # clear _opener global variable - self.addCleanup(urllib.request.urlcleanup) + cls.addClassCleanup(urllib.request.urlcleanup) - self.server = HTTPServer((socket_helper.HOST, 0), self.RobotHandler) + cls.server = HTTPServer((socket_helper.HOST, 0), cls.RobotHandler) + cls.addClassCleanup(cls.server.server_close) - self.t = threading.Thread( + t = threading.Thread( name='HTTPServer serving', - target=self.server.serve_forever, + target=cls.server.serve_forever, # Short poll interval to make the test finish quickly. # Time between requests is short enough that we won't wake # up spuriously too many times. kwargs={'poll_interval':0.01}) - self.t.daemon = True # In case this function raises. - self.t.start() - - def tearDown(self): - self.server.shutdown() - self.t.join() - self.server.server_close() + cls.enterClassContext(threading_helper.start_threads([t])) + cls.addClassCleanup(cls.server.shutdown) SAMPLE_ROBOTS_TXT = b'''\ @@ -687,7 +685,6 @@ def do_GET(self): def log_message(self, format, *args): pass - @threading_helper.reap_threads def testRead(self): # Test that reading a weird robots.txt doesn't fail. addr = self.server.server_address @@ -702,31 +699,79 @@ def testRead(self): self.assertTrue(parser.can_fetch(agent, url + '/utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/%F0%9F%90%8D')) - self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertTrue(parser.can_fetch(agent, url + '/non-utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/%F0')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/\U0001f40d')) self.assertFalse(parser.can_fetch(agent, url + '/%2F[spam]/path')) -class PasswordProtectedSiteTestCase(BaseLocalNetworkTestCase, unittest.TestCase): +class HttpErrorsTestCase(BaseLocalNetworkTestCase, unittest.TestCase): class RobotHandler(BaseHTTPRequestHandler): def do_GET(self): - self.send_error(403, "Forbidden access") + self.send_error(self.server.return_code) def log_message(self, format, *args): pass - @threading_helper.reap_threads - def testPasswordProtectedSite(self): + def setUp(self): + # Make sure that a valid code is set in the test. + self.server.return_code = None + + def testUnauthorized(self): + self.server.return_code = 401 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertFalse(parser.can_fetch("*", robots_url)) + self.assertFalse(parser.can_fetch("*", url + '/some/file.html')) + + def testForbidden(self): + self.server.return_code = 403 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertFalse(parser.can_fetch("*", robots_url)) + self.assertFalse(parser.can_fetch("*", url + '/some/file.html')) + + def testNotFound(self): + self.server.return_code = 404 addr = self.server.server_address - url = 'http://' + socket_helper.HOST + ':' + str(addr[1]) + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertTrue(parser.can_fetch("*", robots_url)) + self.assertTrue(parser.can_fetch("*", url + '/path/file.html')) + + def testTeapot(self): + self.server.return_code = 418 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertTrue(parser.can_fetch("*", robots_url)) + self.assertTrue(parser.can_fetch("*", url + '/pot-1?milk-type=Cream')) + + def testServiceUnavailable(self): + self.server.return_code = 503 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' robots_url = url + "/robots.txt" parser = urllib.robotparser.RobotFileParser() parser.set_url(url) parser.read() self.assertFalse(parser.can_fetch("*", robots_url)) + self.assertFalse(parser.can_fetch("*", url + '/path/file.html')) @support.requires_working_socket() From 0aa7bac8e377397c14e2e4cb1f356db1037a32b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 8 May 2026 19:50:39 +0300 Subject: [PATCH 2/5] Use per-test threads. --- Lib/test/test_robotparser.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 4985d9cffb8947..5ae429d74d4793 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -646,24 +646,23 @@ def test_group_without_user_agent(self): ) class BaseLocalNetworkTestCase: - @classmethod - def setUpClass(cls): - cls.addClassCleanup(print, 'CLEANED UP') + def setUp(self): # clear _opener global variable - cls.addClassCleanup(urllib.request.urlcleanup) + self.addCleanup(urllib.request.urlcleanup) - cls.server = HTTPServer((socket_helper.HOST, 0), cls.RobotHandler) - cls.addClassCleanup(cls.server.server_close) + self.server = HTTPServer((socket_helper.HOST, 0), self.RobotHandler) + self.addCleanup(self.server.server_close) t = threading.Thread( name='HTTPServer serving', - target=cls.server.serve_forever, + target=self.server.serve_forever, # Short poll interval to make the test finish quickly. # Time between requests is short enough that we won't wake # up spuriously too many times. kwargs={'poll_interval':0.01}) - cls.enterClassContext(threading_helper.start_threads([t])) - cls.addClassCleanup(cls.server.shutdown) + t.daemon = True # In case this function raises. + self.enterContext(threading_helper.start_threads([t])) + self.addCleanup(self.server.shutdown) SAMPLE_ROBOTS_TXT = b'''\ @@ -699,6 +698,7 @@ def testRead(self): self.assertTrue(parser.can_fetch(agent, url + '/utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/%F0%9F%90%8D')) + self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertTrue(parser.can_fetch(agent, url + '/non-utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/%F0')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/\U0001f40d')) @@ -715,6 +715,7 @@ def log_message(self, format, *args): pass def setUp(self): + super().setUp() # Make sure that a valid code is set in the test. self.server.return_code = None From 79cb888234984ee5446372ee2cacfd5e14abc02a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 8 May 2026 20:46:53 +0300 Subject: [PATCH 3/5] Call urlcleanup() in NetworkTestCase. --- Lib/test/test_robotparser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 5ae429d74d4793..5c59a22cc0ed5e 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -787,6 +787,7 @@ def setUpClass(cls): with socket_helper.transient_internet(cls.base_url): cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt) cls.parser.read() + urllib.request.urlcleanup() def url(self, path): return '{}{}{}'.format( From 28bbd608451764720c98ca92581bb44355611341 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 8 May 2026 21:24:59 +0300 Subject: [PATCH 4/5] Calll urlcleanup() for test_read_404. --- Lib/test/test_robotparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 5c59a22cc0ed5e..565e91885e43f2 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -784,10 +784,10 @@ class NetworkTestCase(unittest.TestCase): @classmethod def setUpClass(cls): support.requires('network') + cls.addClassCleanup(urllib.request.urlcleanup) with socket_helper.transient_internet(cls.base_url): cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt) cls.parser.read() - urllib.request.urlcleanup() def url(self, path): return '{}{}{}'.format( From ff4fc426fbfe207fd5a83cf01dee1cf9a110e964 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 8 May 2026 22:55:00 +0300 Subject: [PATCH 5/5] Revert "Use per-test threads." This reverts commit 0aa7bac8e377397c14e2e4cb1f356db1037a32b1. --- Lib/test/test_robotparser.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 565e91885e43f2..cd1477037e94b7 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -646,23 +646,23 @@ def test_group_without_user_agent(self): ) class BaseLocalNetworkTestCase: - def setUp(self): + @classmethod + def setUpClass(cls): # clear _opener global variable - self.addCleanup(urllib.request.urlcleanup) + cls.addClassCleanup(urllib.request.urlcleanup) - self.server = HTTPServer((socket_helper.HOST, 0), self.RobotHandler) - self.addCleanup(self.server.server_close) + cls.server = HTTPServer((socket_helper.HOST, 0), cls.RobotHandler) + cls.addClassCleanup(cls.server.server_close) t = threading.Thread( name='HTTPServer serving', - target=self.server.serve_forever, + target=cls.server.serve_forever, # Short poll interval to make the test finish quickly. # Time between requests is short enough that we won't wake # up spuriously too many times. kwargs={'poll_interval':0.01}) - t.daemon = True # In case this function raises. - self.enterContext(threading_helper.start_threads([t])) - self.addCleanup(self.server.shutdown) + cls.enterClassContext(threading_helper.start_threads([t])) + cls.addClassCleanup(cls.server.shutdown) SAMPLE_ROBOTS_TXT = b'''\ @@ -698,7 +698,6 @@ def testRead(self): self.assertTrue(parser.can_fetch(agent, url + '/utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/%F0%9F%90%8D')) - self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertTrue(parser.can_fetch(agent, url + '/non-utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/%F0')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/\U0001f40d')) @@ -715,7 +714,6 @@ def log_message(self, format, *args): pass def setUp(self): - super().setUp() # Make sure that a valid code is set in the test. self.server.return_code = None