# Chapter 41: Concurrent Futures

This notebook covers the `concurrent.futures` module, which provides a high-level interface for asynchronous execution using thread pools. You will learn how to submit tasks, collect results, handle exceptions, and process futures as they complete.

## Key Concepts
- **`ThreadPoolExecutor`**: Manages a pool of worker threads for concurrent execution
- **`submit()`**: Schedules a callable and returns a `Future` object
- **`map()`**: Applies a function to an iterable, returning results in order
- **`Future`**: Represents a pending result with `result()`, `exception()`, and `done()`
- **`as_completed()`**: Yields futures in the order they finish, not the order submitted

## Section 1: ThreadPoolExecutor Basics

`ThreadPoolExecutor` manages a pool of threads and provides methods to submit work. It is used as a context manager so that threads are properly cleaned up when done.

In [None]:
from concurrent.futures import ThreadPoolExecutor


def square(x: int) -> int:
    """Return the square of x."""
    return x ** 2


# Use as a context manager for automatic cleanup
with ThreadPoolExecutor(max_workers=2) as pool:
    future = pool.submit(square, 5)
    result: int = future.result()

print(f"square(5) = {result}")
print(f"Result type: {type(result).__name__}")

In [None]:
from concurrent.futures import ThreadPoolExecutor
import time


def slow_double(x: int) -> int:
    """Simulate a slow computation that doubles x."""
    time.sleep(0.05)
    return x * 2


# Measure sequential vs concurrent execution
values: list[int] = [1, 2, 3, 4, 5, 6, 7, 8]

# Sequential
start: float = time.perf_counter()
sequential_results: list[int] = [slow_double(v) for v in values]
sequential_time: float = time.perf_counter() - start

# Concurrent with 4 workers
start = time.perf_counter()
with ThreadPoolExecutor(max_workers=4) as pool:
    concurrent_results: list[int] = list(pool.map(slow_double, values))
concurrent_time: float = time.perf_counter() - start

print(f"Sequential results: {sequential_results}")
print(f"Concurrent results: {concurrent_results}")
print(f"Sequential time: {sequential_time:.3f}s")
print(f"Concurrent time: {concurrent_time:.3f}s")
print(f"Speedup: {sequential_time / concurrent_time:.1f}x")

## Section 2: submit() and Future Objects

`submit()` schedules a callable to be executed and immediately returns a `Future` object. The `Future` represents the pending result and provides methods to check status and retrieve the result.

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor


def compute(x: int) -> int:
    """Return x squared."""
    return x ** 2


with ThreadPoolExecutor(max_workers=2) as pool:
    # submit returns a Future immediately
    future: Future[int] = pool.submit(compute, 5)

    print(f"Future type: {type(future).__name__}")
    print(f"Is Future instance: {isinstance(future, Future)}")

    # result() blocks until the result is available
    value: int = future.result()
    print(f"Result: {value}")
    print(f"Expected: 25")
    print(f"Correct: {value == 25}")

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor


def greet(name: str) -> str:
    """Return a greeting string."""
    return f"Hello, {name}!"


with ThreadPoolExecutor(max_workers=2) as pool:
    # Submit multiple tasks
    futures: list[Future[str]] = [
        pool.submit(greet, name)
        for name in ["Alice", "Bob", "Charlie"]
    ]

    # Collect results in submission order
    results: list[str] = [f.result() for f in futures]

for r in results:
    print(r)

## Section 3: map() -- Ordered Results

`map()` applies a function to each item in an iterable and returns results in the same order as the input. Unlike `submit()`, it does not return `Future` objects -- it returns the results directly.

In [None]:
from concurrent.futures import ThreadPoolExecutor


def double(x: int) -> int:
    """Return x multiplied by 2."""
    return x * 2


with ThreadPoolExecutor(max_workers=2) as pool:
    # map applies the function to each element
    results: list[int] = list(pool.map(double, [1, 2, 3, 4]))

print(f"Results: {results}")
print(f"Expected: [2, 4, 6, 8]")
print(f"Correct: {results == [2, 4, 6, 8]}")

In [None]:
from concurrent.futures import ThreadPoolExecutor


def add(a: int, b: int) -> int:
    """Return the sum of a and b."""
    return a + b


# map with multiple iterables (like built-in map)
with ThreadPoolExecutor(max_workers=2) as pool:
    results: list[int] = list(pool.map(add, [1, 2, 3], [10, 20, 30]))

print(f"Results: {results}")
print(f"Expected: [11, 22, 33]")
print(f"Correct: {results == [11, 22, 33]}")

## Section 4: as_completed() -- Process Results as They Finish

`as_completed()` yields `Future` objects in the order they finish, not the order they were submitted. This is useful when you want to process results as soon as they are available.

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor, as_completed


def square(x: int) -> int:
    """Return x squared."""
    return x ** 2


with ThreadPoolExecutor(max_workers=2) as pool:
    # Create a dict mapping futures to their input values
    futures: dict[Future[int], int] = {
        pool.submit(square, i): i for i in range(5)
    }

    results: list[int] = []
    for future in as_completed(futures):
        input_val: int = futures[future]
        result: int = future.result()
        results.append(result)
        print(f"square({input_val}) = {result}")

print(f"\nAll results (sorted): {sorted(results)}")
print(f"Expected: [0, 1, 4, 9, 16]")
print(f"Correct: {sorted(results) == [0, 1, 4, 9, 16]}")

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
import time


def variable_work(task_id: int, duration: float) -> str:
    """Simulate work with variable duration."""
    time.sleep(duration)
    return f"task-{task_id} ({duration}s)"


tasks: list[tuple[int, float]] = [
    (1, 0.15),
    (2, 0.05),
    (3, 0.10),
]

with ThreadPoolExecutor(max_workers=3) as pool:
    futures: dict[Future[str], int] = {
        pool.submit(variable_work, tid, dur): tid
        for tid, dur in tasks
    }

    print("Results in completion order:")
    for future in as_completed(futures):
        print(f"  Completed: {future.result()}")

## Section 5: Future Exception Handling

If a callable raises an exception, the `Future` captures it. You can retrieve the exception with `future.exception()` or let it re-raise when calling `future.result()`.

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor


def failing() -> None:
    """A task that always raises an exception."""
    raise ValueError("task failed")


with ThreadPoolExecutor(max_workers=1) as pool:
    future: Future[None] = pool.submit(failing)

    # exception() returns the exception without re-raising
    exc: BaseException | None = future.exception()
    print(f"Exception is not None: {exc is not None}")
    print(f"Exception type: {type(exc).__name__}")
    print(f"Exception message: {exc}")
    print(f"Is ValueError: {isinstance(exc, ValueError)}")

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor


def maybe_fail(x: int) -> int:
    """Fail on negative input, succeed otherwise."""
    if x < 0:
        raise ValueError(f"Negative input: {x}")
    return x * 10


with ThreadPoolExecutor(max_workers=2) as pool:
    inputs: list[int] = [3, -1, 7, -2, 5]
    futures: list[Future[int]] = [pool.submit(maybe_fail, x) for x in inputs]

    for i, future in enumerate(futures):
        exc: BaseException | None = future.exception()
        if exc is not None:
            print(f"Input {inputs[i]:2d} -> ERROR: {exc}")
        else:
            print(f"Input {inputs[i]:2d} -> Result: {future.result()}")

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor


def safe_divide(a: float, b: float) -> float:
    """Divide a by b."""
    return a / b


with ThreadPoolExecutor(max_workers=1) as pool:
    future: Future[float] = pool.submit(safe_divide, 10.0, 0.0)

    # Calling result() re-raises the exception
    try:
        value: float = future.result()
        print(f"Result: {value}")
    except ZeroDivisionError as e:
        print(f"Caught exception from future.result(): {e}")
        print(f"Exception type: {type(e).__name__}")

## Section 6: Future Status -- done() and cancel()

Futures provide methods to check their status. `done()` returns `True` when the future has completed (either successfully or with an exception). `cancel()` attempts to cancel execution.

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor


with ThreadPoolExecutor(max_workers=1) as pool:
    future: Future[int] = pool.submit(lambda: 42)

    # Wait for the result
    result: int = future.result()
    print(f"Result: {result}")
    print(f"Future done: {future.done()}")
    print(f"Correct: {result == 42 and future.done()}")

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor
import time


def slow_task() -> str:
    """A task that takes some time."""
    time.sleep(0.1)
    return "completed"


with ThreadPoolExecutor(max_workers=1) as pool:
    future: Future[str] = pool.submit(slow_task)

    # Check status before completion
    print(f"Done immediately after submit: {future.done()}")

    # Wait for completion
    result: str = future.result()
    print(f"Done after result(): {future.done()}")
    print(f"Result: {result}")

## Section 7: Practical Patterns

Common patterns for using `concurrent.futures` in real applications.

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
import time


def fetch_data(url: str) -> dict[str, str | int]:
    """Simulate fetching data from a URL."""
    time.sleep(0.05)  # Simulate network latency
    return {"url": url, "status": 200}


urls: list[str] = [
    "https://api.example.com/users",
    "https://api.example.com/products",
    "https://api.example.com/orders",
    "https://api.example.com/reviews",
]

# Fetch all URLs concurrently
with ThreadPoolExecutor(max_workers=4) as pool:
    future_to_url: dict[Future[dict[str, str | int]], str] = {
        pool.submit(fetch_data, url): url for url in urls
    }

    for future in as_completed(future_to_url):
        url: str = future_to_url[future]
        try:
            data: dict[str, str | int] = future.result()
            print(f"Fetched {url} -> status {data['status']}")
        except Exception as e:
            print(f"Error fetching {url}: {e}")

In [None]:
from concurrent.futures import Future, ThreadPoolExecutor, as_completed


def process_item(item: int) -> dict[str, int | str]:
    """Process a single item, potentially failing."""
    if item % 3 == 0:
        raise ValueError(f"Cannot process {item}")
    return {"item": item, "result": item * 10}


items: list[int] = list(range(1, 8))
successes: list[dict[str, int | str]] = []
failures: list[str] = []

with ThreadPoolExecutor(max_workers=3) as pool:
    futures: dict[Future[dict[str, int | str]], int] = {
        pool.submit(process_item, item): item for item in items
    }

    for future in as_completed(futures):
        item: int = futures[future]
        exc: BaseException | None = future.exception()
        if exc is not None:
            failures.append(f"item {item}: {exc}")
        else:
            successes.append(future.result())

print("Successes:")
for s in sorted(successes, key=lambda x: x["item"]):
    print(f"  {s}")

print(f"\nFailures:")
for f in sorted(failures):
    print(f"  {f}")

print(f"\nTotal: {len(successes)} succeeded, {len(failures)} failed")

## Summary

### Core API
- **`ThreadPoolExecutor(max_workers=N)`**: Create a pool of `N` worker threads; use as a context manager
- **`pool.submit(fn, *args)`**: Schedule `fn(*args)` and return a `Future` immediately
- **`pool.map(fn, iterable)`**: Apply `fn` to each element; results are returned in input order

### Future Methods
- **`future.result(timeout=None)`**: Block until the result is ready; re-raises exceptions
- **`future.exception(timeout=None)`**: Return the exception or `None` if successful
- **`future.done()`**: Return `True` if the future has finished (success or failure)
- **`future.cancel()`**: Attempt to cancel the future before it starts executing

### Processing Patterns
- **`as_completed(futures)`**: Yield futures in completion order (fastest results first)
- **Submit + collect**: Submit multiple tasks, then iterate over futures for results
- **Map for ordered results**: Use `map()` when you need results in the same order as inputs

### Best Practices
- Always use the executor as a context manager (`with` statement) for proper cleanup
- Check `future.exception()` or use `try/except` around `future.result()` to handle errors
- Use `as_completed()` when you want to process results as soon as they are ready
- Choose `max_workers` based on your workload: I/O-bound tasks benefit from more threads