---
## Controlling memory usage

List comprehension generate the whole list at once. This can be a problem if the list is very long.

In [1]:
from random import randint
numbers = [randint(1,100) for _ in range(int(1e6))]

List comprehension vs generator expression:

In [2]:
%timeit [x*x for x in numbers]
%timeit (x*x for x in numbers)

45.4 ms ± 1.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
255 ns ± 6.59 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [3]:
squares = [x*x for x in numbers]
squares_gen = (x*x for x in numbers)

%timeit sum(squares)
%timeit sum(squares_gen)

5.68 ms ± 188 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
42.8 ns ± 0.939 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [4]:
import tracemalloc
from typing import Generator

def calculate_squares(generator: bool) -> list[int]|Generator[int, None, None]:
    """Generates either a list of squares of random numbers, or generator expression."""
    max = int(1e6)

    if generator:
        numbers = (randint(1, 100) for _ in range(max))
        return (x*x for x in numbers)
    else:
        numbers = [randint(1, 100) for _ in range(max)]
        return [x*x for x in numbers]

def measure(generator: bool) -> str:
    """Measure the used memory and timing to compare list comprehension and generator."""
    tracemalloc.start()
    calculate_squares(generator)

    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    print(f"Generator: {generator}")
    print(f"Current memory usage: {current / 10**6:.4f} MB")
    print(f"Peak memory usage: {peak / 10**6:.4f} MB")
    print("-" * 40)
measure(True)
measure(False)

Generator: True
Current memory usage: 0.0000 MB
Peak memory usage: 0.0011 MB
----------------------------------------
Generator: False
Current memory usage: 0.0011 MB
Peak memory usage: 43.7922 MB
----------------------------------------
