# Generators and Lazy Pipelines

- You can chain generator functions to form multi-stage data pipelines that process items one at a time.  
- No intermediate lists are built, so memory stays low even for very large streams.  
- Each generator only holds its own minimal state and passes items downstream on demand.  

## Memory Efficiency

- Lazy iterables maintain only minimal state (like start, stop, step) regardless of total length.  
- Eager collections (lists, tuples) grow in memory usage as you add items.  
- Use `sys.getsizeof()` to inspect the in-memory size of objects themselves (not their contents).  

In [None]:
# 1. DONE Ingest the log lines
# 2. DONE Filter log lines based on either level or message substring
# 3. DONE Extract and return only the message attribute of the logs

import sys
import json


def read_logs(filepath):
    """Reads the contents of a file line by line.

    Args:
        filepath (str): The path where the file is located.

    Returns:
        generator (dict(str)): The json dictionary for the log line.
    """
    with open(filepath, 'r') as file:
        for line in file:
            line = line.strip()
            if not line:
                continue
            yield json.loads(line)


def filter_logs(logs, level=None, message_substring=None):
    """Filters any iterable containing dictionaries by either level or message_substring (or both)

    Args:
        logs (iterable(dict)): Iterable containing the logs to be filtered.
        level (str): The log level to keep. Defaults to None.
        message_substring (str): The pattern to look for in messages. Defaults to None.

    Returns:
        generator (dict(str)): The json dictionary for the filtered log.
    """

    for log in logs:
        if (
            level is not None
            and log.get("level", "").lower() != level.lower()
        ):
            continue

        if (
            message_substring is not None
            and message_substring.lower() not in log.get("message", "").lower()
        ):
            continue

        yield log


def extract_field(logs, field="message"):
    """Extracts a specific field from any iterable containing dictionaries.

    Args:
        logs (iterable(dict)): Iterable containing the logs to be evaluated.
        field (str): The field to return. Defaults to 'message'.

    Returns:
        generator (str): The value of the extracted field.
    """
    for log in logs:
        yield log.get(field, "").strip()


def get_first_n(logs, n=10):
    """Extracts the first n items from the provided iterable.

    Args:
        logs (iterable(T)): Iterable from which items will be extracted.
        n (int): The number of items to extract.

    Returns:
        generator (T): The item from the iterable.
    """
    count = 0

    for log in logs:
        if count >= n:
            break

        yield log
        count += 1


logs_gen = read_logs("large_logs.txt")
filter_gen = filter_logs(logs_gen, message_substring="user")
extract_gen = extract_field(filter_gen, "message")


for log in get_first_n(extract_gen, 4):
    print(log)


print("Generator object sizes (in bytes):",
      sys.getsizeof(logs_gen),
      sys.getsizeof(filter_gen),
      sys.getsizeof(extract_gen)
     )