In [1]:
# sequential download, not sure why anyone would use this
import time
from pathlib import Path
from typing import Callable
import httpx

POP20_CC = ('CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR TR CD FR').split()
BASE_URL = 'https://www.fluentpython.com/data/flags'
DEST_DIR = Path('downloaded')

def save_flag(img: bytes, filename: str) -> None:
    (DEST_DIR / filename).write_bytes(img)

def get_flag(cc: str) -> bytes:
    url = f'{BASE_URL}/{cc}/{cc}.gif'.lower()
    resp = httpx.get(url, timeout=6.1, follow_redirects=True)
    resp.raise_for_status()
    return resp.content

def download_many(cc_list: list[str]) -> int:
    for cc in sorted(cc_list):
        image = get_flag(cc)
        save_flag(image, f'{cc}.gif')
        print(cc, end=' ', flush=True)
    return len(cc_list)

def main(downloader: Callable[[list[str]], int]) -> None:
    DEST_DIR.mkdir(exist_ok=True)
    t0 = time.perf_counter()
    count = downloader(POP20_CC)
    elapsed = time.perf_counter() - t0
    print(f'\n{count} downloads in {elapsed:.2f}s')

if __name__ == '__main__':
    main(download_many)

BD BR CD CN DE EG ET FR ID IN IR JP MX NG PH PK RU TR US VN 
20 downloads in 9.52s


In [4]:
# threads are useful for network requests
from concurrent import futures

def download_one(cc: str):
    image = get_flag(cc)
    save_flag(image, f'{cc}.gif')
    print(cc, end=' ', flush=True)
    return cc

def downloaded_many(cc_list: list[str]) -> int:
    with futures.ThreadPoolExecutor() as executor:
        res = executor.map(download_one, sorted(cc_list))
    return len(list(res))

if __name__ == '__main__':
    main(download_many_thread)

Scheduled for BR: <Future at 0x1192c7940 state=running>
Scheduled for CN: <Future at 0x107ab46a0 state=running>
Scheduled for ID: <Future at 0x107ab7cd0 state=running>
Scheduled for IN: <Future at 0x107ab53f0 state=pending>
Scheduled for US: <Future at 0x107aff400 state=pending>
ID <Future at 0x107ab7cd0 state=finished returned str> result: 'ID'
BR <Future at 0x1192c7940 state=finished returned str> result: 'BR'
CN <Future at 0x107ab46a0 state=finished returned str> result: 'CN'
IN <Future at 0x107ab53f0 state=finished returned str> result: 'IN'
US <Future at 0x107aff400 state=finished returned str> result: 'US'

5 downloads in 1.11s


In [3]:
def download_many_thread(cc_list: list[str]) -> int:
    cc_list = cc_list[:5]
    with futures.ThreadPoolExecutor(max_workers=3) as executor:
        to_do: list[futures.Future] = []
        for cc in sorted(cc_list):
            future = executor.submit(download_one, cc)
            to_do.append(future)
            print(f'Scheduled for {cc}: {future}')
        for count, future in enumerate(futures.as_completed(to_do), 1):
            res: str = future.result()
            print(f'{future} result: {res!r}')
    return count

In [2]:
# using ProcessPoolExecutor for a previous method in Chapter 19
import sys
from concurrent import futures # No need to import multiprocessing, SimpleQueue; concurrent.futures hides all that.
from time import perf_counter
from typing import NamedTuple

PRIME_FIXTURE = [
    (2, True),
    (142702110479723, True),
    (299593572317531, True),
    (3333333333333301, True),
    (3333333333333333, False),
    (3333335652092209, False),
    (4444444444444423, True),
    (4444444444444444, False),
    (4444444488888889, False),
    (5555553133149889, False),
    (5555555555555503, True),
    (5555555555555555, False),
    (6666666666666666, False),
    (6666666666666719, True),
    (6666667141414921, False),
    (7777777536340681, False),
    (7777777777777753, True),
    (7777777777777777, False),
    (9999999999999917, True),
    (9999999999999999, False),
]

NUMBERS = [n for n, _ in PRIME_FIXTURE]

class PrimeResult(NamedTuple):
    n: int
    flag: bool
    elapsed: float

def check(n: int) -> PrimeResult:
    try:
        t0 = perf_counter()
        res = is_prime(n)
        return PrimeResult(n, res, perf_counter() - t0)
    except Exception as e:
        print(f"Error checking {n}: {e}")
        return PrimeResult(n, False, 0.0) 

def main() -> None:
    #if len(sys.argv) < 2:
    workers = None
    #else:
    #    workers = int(sys.argv[1])
    executor = futures.ProcessPoolExecutor(workers)
    actual_workers = executor._max_workers
    print(f'Checking {len(NUMBERS)} numbers with {actual_workers} processes:')
    t0 = perf_counter()
    numbers = sorted(NUMBERS, reverse=True)
    with executor:
        for n, prime, elapsed in executor.map(check, numbers):
            label = 'P' if prime else ' '
            print(f'{n:16} {label} {elapsed:9.6f}s')
    time = perf_counter() - t0
    print(f'Total time: {time:.2f}s')

def is_prime(n: int) -> bool:
    if n < 2:
        return False
    if n == 2:
        return True
    if n % 2 == 0:
        return False

    root = math.isqrt(n)
    for i in range(3, root + 1, 2):
        if n % i == 0:
            return False
    return True

if __name__ == '__main__':
    main()

Checking 20 numbers with 10 processes:


Process SpawnProcess-11:
Traceback (most recent call last):
  File "/Users/mathias/miniforge3/envs/myenv/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/mathias/miniforge3/envs/myenv/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/mathias/miniforge3/envs/myenv/lib/python3.10/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/Users/mathias/miniforge3/envs/myenv/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'check' on <module '__main__' (built-in)>
Process SpawnProcess-13:
Traceback (most recent call last):
  File "/Users/mathias/miniforge3/envs/myenv/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/mathias/miniforge3/envs/myenv/lib/python3.10/multiprocessing/process.py"

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

In [5]:
from time import sleep, strftime
from concurrent import futures

def display(*args):
    print(strftime('[%H:%M:%S]'), end=' ')
    print(*args)

def loiter(n):
    msg = '{}loiter({}): doing nothing for {}s...'
    display(msg.format('\t'*n, n, n))
    sleep(n)
    msg = '{}loiter({}): done.'
    display(msg.format('\t'*n, n))
    return n * 10

def main():
    display('Script starting.')
    executor = futures.ThreadPoolExecutor(max_workers=3)
    results = executor.map(loiter, range(5))
    display('results:', results)
    display('Waiting for individual results:')
    for i, result in enumerate(results):
        display(f'result {i}: {result}')

if __name__ == '__main__':
    main()

[18:22:38] Script starting.
[18:22:38] loiter(0): doing nothing for 0s...
[18:22:38] loiter(0): done.
[18:22:38] 	loiter(1): doing nothing for 1s...
[18:22:38] 		loiter(2): doing nothing for 2s...
[18:22:38] 			loiter(3): doing nothing for 3s...
[18:22:38] results: <generator object Executor.map.<locals>.result_iterator at 0x10950b140>
[18:22:38] Waiting for individual results:
[18:22:38] result 0: 0
[18:22:39] 	loiter(1): done.
[18:22:39] 				loiter(4): doing nothing for 4s...
[18:22:39] result 1: 10
[18:22:40] 		loiter(2): done.
[18:22:40] result 2: 20
[18:22:41] 			loiter(3): done.
[18:22:41] result 3: 30
[18:22:43] 				loiter(4): done.
[18:22:43] result 4: 40


In [6]:
# A nice progress bar
import time
from tqdm import tqdm

for i in tqdm(range(1000)):
    time.sleep(.01)

100%|██████████████████████████████████████████████████████████████| 1000/1000 [00:12<00:00, 83.03it/s]


In [8]:
from collections import Counter
from http import HTTPStatus
import httpx
import tqdm
from pathlib import Path
from enum import Enum

DownloadStatus = Enum('DownloadStatus', 'OK NOT_FOUND ERROR')
DEST_DIR = Path('downloaded')

def main(download_many, default_concur_req, max_concur_req):
    ...

def save_flag(img: bytes, filename: str) -> None:
    (DEST_DIR / filename).write_bytes(img)

def get_flag(base_url: str, cc: str) -> bytes:
    url = f'{base_url}/{cc}/{cc}.gif'.lower()
    resp = httpx.get(url, timeout=3.1, follow_redirects=True)
    resp.raise_for_status()
    return resp.content

def download_one(cc: str, base_url: str, verbose: bool = False) -> DownloadStatus:
    try:
        image = get_flag(base_url, cc)
    except httpx.HTTPStatusError as exc:
        res = exc.response
        if res.status_code == HTTPStatus.NOT_FOUND:
            status = DownloadStatus.NOT_FOUND
            msg = f'not found: {res.url}'
        else:
            raise
    else:
        save_flag(image, f'{cc}.gif')
        status = DownloadStatus.OK
        msg = 'OK'
    if verbose:
        print(cc, msg)
    return status

In [9]:
# sequential
def download_many(cc_list: list[str], base_url: str, verbose: bool, _unused_concur_req: int) ->  Counter[DownloadStatus]:
    counter: Counter[DownloadStatus] = Counter()
    cc_iter = sorted(cc_list)
    if not verbose:
        cc_iter = tqdm.tqdm(cc_iter)
    for cc in cc_iter:
        try:
            status = download_one(cc, base_url, verbose)
        except httpx.HTTPStatusError as exc:
            error_msg = 'HTTP error {resp.status_code} - {resp.reason_phrase}'
            error_msg = error_msg.format(resp=exc.response)
        except httpx.RequestError as exc:
            error_msg = f'{exc} {type(exc)}'.strip()
        except KeyboardInterrupt:
            break
        else:
            error_msg = ''

        if error_msg:
            status = DownloadStatus.ERROR
        counter[status] += 1
        if verbose and error_msg:
            print(f'{cc} error: {error_msg}')
    return counter

In [12]:
# thread pools
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed
import httpx
import tqdm

DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000

def download_many(cc_list: list[str], base_url: str, verbose: bool, concur_req: int) -> Counter[DownloadStatus]:
    counter: Counter[DownloadStatus] = Counter()
    with ThreadPoolExecutor(max_workers=concur_req) as executor:
        to_do_map = {} # to_do_map maps each future to the country code assigned to it
        for cc in sorted(cc_list):
            future = executor.submit(download_one, cc, base_url, verbose)
            to_do_map[future] = cc
        done_iter = as_completed(to_do_map)
        if not verbose:
            done_iter = tqdm.tqdm(done_iter, total=len(cc_list))
        for future in done_iter:
            try:
                status = future.result()
            except httpx.HTTPStatusError as exc:
                error_msg = 'HTTP error {resp.status_code} - {resp.reason_phrase}'
                error_msg = error_msg.format(resp=exc.response)
            except httpx.RequestError as exc:
                error_msg = f'{exc} {type(exc)}'.strip()
            except KeyboardInterrupt:
                break
            else:
                error_msg = ''

            if error_msg:
                status = DownloadStatus.ERROR
            counter[status] += 1
            if verbose and error_msg:
                cc = to_do_map[future]
                print(f'{cc} error: {error_msg}')
    return counter

if __name__ == '__main__':
    main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)