diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 88e93ba6b002eb..15ba22b78f5c17 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -1354,6 +1354,23 @@ The following example uses no proxies at all, overriding environment settings:: ... f.read().decode('utf-8') ... +.. _urllib-request-cli: + +:mod:`urllib.request` can also be invoked directly using the :option:`-m` +switch of the interpreter with an ``URL`` argument:: + + python -m urllib.request https://python.org/ + +By default, the downloaded data is printed to stdout. The option ``-o/--output`` +specifies an output file where the downloaded data is stored instead of being +printed:: + + python -m urllib.request https://python.org/ --output python.html + +If the output file already exists, its content is overwritten. + +.. versionadded:: 3.11 + Legacy interface ---------------- diff --git a/Lib/test/test_urllib2_localnet.py b/Lib/test/test_urllib2_localnet.py index 36fb05d3db0e2a..8d408d93e2bf5e 100644 --- a/Lib/test/test_urllib2_localnet.py +++ b/Lib/test/test_urllib2_localnet.py @@ -7,6 +7,9 @@ import threading import unittest import hashlib +import subprocess +import sys +import tempfile from test.support import hashlib_helper from test.support import threading_helper @@ -660,6 +663,38 @@ def test_line_iteration(self): (index, len(lines[index]), len(line))) self.assertEqual(index + 1, len(lines)) + def test_download_to_stdout(self): + content = b"My hovercraft is full of eels." + handler = self.start_server([(200, [], content)]) + proc = subprocess.run( + [sys.executable, "-m", "urllib.request", + f"http://localhost:{handler.port}"], + capture_output=True + ) + self.assertEqual(proc.stdout, content) + self.assertEqual(proc.stderr, b"") + + def test_download_to_file(self): + content = b"I will not buy this record; it is scratched." + handler = self.start_server([(200, [], content)]*2) + + with tempfile.TemporaryDirectory() as directory: + for option in ["--output", "-o"]: + filename = os.path.join( + directory, f"download-test{option}.txt" + ) + proc = subprocess.run( + [sys.executable, "-m", "urllib.request", + f"http://localhost:{handler.port}", + option, filename], + capture_output=True + ) + with open(filename, "rb") as f: + file_content = f.read() + self.assertEqual(proc.stdout, b"") + self.assertEqual(proc.stderr, b"") + self.assertEqual(file_content, content) + def setUpModule(): thread_info = threading_helper.threading_setup() diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index fd6fc36aee04b3..9dd6b12c52104b 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2781,3 +2781,36 @@ def proxy_bypass(host): # By default use environment variables getproxies = getproxies_environment proxy_bypass = proxy_bypass_environment + + +def _download(): + import argparse + + parser = argparse.ArgumentParser( + description="Download the provided URL (FTP/HTTP/HTTPS supported) " + "and print it to stdout by default. If specified, write to OUTPUT " + "instead." + ) + parser.add_argument("URL", help="(encoded) URL to download") + parser.add_argument( + "-o", + "--output", + type=argparse.FileType('wb'), default=sys.stdout.buffer, + help="write to OUTPUT instead of stdout" + ) + args = parser.parse_args() + + buffer = memoryview(bytearray(32768)) + try: + with urlopen(args.URL) as response: + while n_bytes_read := response.readinto(buffer): + args.output.write(buffer[:n_bytes_read]) + except URLError as exc: + print(f"Error while downloading '{args.URL}': {exc.reason}") + + if args.output is not sys.stdout.buffer: + args.output.close() + + +if __name__ == "__main__": + _download() diff --git a/Misc/ACKS b/Misc/ACKS index 23c92abb4d02a7..0698028900605c 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -76,6 +76,7 @@ Jason Asbahr David Ascher Ammar Askar Neil Aspinall +Peter Åstrand Chris AtLee Aymeric Augustin Andres Ayala @@ -491,6 +492,7 @@ Daniel Ellis Phil Elson David Ely Victor van den Elzen +Vlad Emelianov Jeff Epler Tom Epperly Gökcen Eraslan @@ -1384,6 +1386,7 @@ Jean-François Piéronne Oleg Plakhotnyuk Anatoliy Platonov Marcel Plch +Thomas Pohl Remi Pointel Jon Poler Ariel Poliak @@ -1998,8 +2001,5 @@ Tarek Ziadé Jelle Zijlstra Gennadiy Zlobin Doug Zongker -Peter Åstrand -Vlad Emelianov -Andrey Doroschenko (Entries should be added in rough alphabetical order by last names) diff --git a/Misc/NEWS.d/next/Library/2021-10-26-07-02-51.bpo-45466.DOzSv2.rst b/Misc/NEWS.d/next/Library/2021-10-26-07-02-51.bpo-45466.DOzSv2.rst new file mode 100644 index 00000000000000..6f8f9cf2fda30b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-10-26-07-02-51.bpo-45466.DOzSv2.rst @@ -0,0 +1,3 @@ +The :mod:`urllib.request` module can now download files (e.g. +``python -m urllib.request https://python.org/``). For more +info, see ``python -m urllib.request -h``. Patch by Thomas Pohl.