# Iterators and Generators in Python

---

## Table of Contents
1. Iterables vs Iterators
2. The Iterator Protocol
3. Creating Custom Iterators
4. Introduction to Generators
5. Generator Functions
6. Generator Expressions
7. yield from Statement
8. Generator Use Cases
9. Memory Efficiency
10. Key Points
11. Practice Exercises

---

## 1. Iterables vs Iterators

**Theory:**
- **Iterable**: An object that can return an iterator (has `__iter__` method)
- **Iterator**: An object that produces values one at a time (has `__next__` method)
- All iterators are iterables, but not all iterables are iterators
- Examples of iterables: lists, tuples, strings, dictionaries, sets, files

In [2]:
# Iterables - objects you can loop over
my_list = [1, 2, 3, 4, 5]
my_string = "Hello"
my_dict = {'a': 1, 'b': 2}

# All of these work in for loops
print("List iteration:")
for item in my_list:
    print(item, end=' ')

print("\n\nString iteration:")
for char in my_string:
    print(char, end=' ')

List iteration:
1 2 3 4 5 

String iteration:
H e l l o 

In [3]:
# Getting an iterator from an iterable
my_list = [1, 2, 3]

# iter() returns an iterator
my_iterator = iter(my_list)

print(f"List type: {type(my_list)}")
print(f"Iterator type: {type(my_iterator)}")

List type: <class 'list'>
Iterator type: <class 'list_iterator'>


In [4]:
# Using next() to get values from iterator
my_list = [10, 20, 30]
my_iterator = iter(my_list)

print(f"First: {next(my_iterator)}")
print(f"Second: {next(my_iterator)}")
print(f"Third: {next(my_iterator)}")

# Next call would raise StopIteration
try:
    print(next(my_iterator))
except StopIteration:
    print("Iterator exhausted!")

First: 10
Second: 20
Third: 30
Iterator exhausted!


In [5]:
# Iterators are single-use
my_list = [1, 2, 3]
my_iterator = iter(my_list)

# First loop exhausts the iterator
print("First loop:")
for item in my_iterator:
    print(item, end=' ')

# Second loop produces nothing
print("\nSecond loop:")
for item in my_iterator:
    print(item, end=' ')
print("(empty - iterator exhausted)")

First loop:
1 2 3 
Second loop:
(empty - iterator exhausted)


In [6]:
# Checking if object is iterable
from collections.abc import Iterable, Iterator

my_list = [1, 2, 3]
my_iter = iter(my_list)

print(f"List is Iterable: {isinstance(my_list, Iterable)}")
print(f"List is Iterator: {isinstance(my_list, Iterator)}")
print(f"Iterator is Iterable: {isinstance(my_iter, Iterable)}")
print(f"Iterator is Iterator: {isinstance(my_iter, Iterator)}")

List is Iterable: True
List is Iterator: False
Iterator is Iterable: True
Iterator is Iterator: True


---

## 2. The Iterator Protocol

**The iterator protocol requires:**
- `__iter__()` - Returns the iterator object itself
- `__next__()` - Returns the next value, raises StopIteration when done

In [7]:
# How for loop works internally
my_list = ['a', 'b', 'c']

# This for loop:
print("Using for loop:")
for item in my_list:
    print(item, end=' ')

# Is equivalent to:
print("\n\nManual iteration:")
iterator = iter(my_list)  # Calls my_list.__iter__()
while True:
    try:
        item = next(iterator)  # Calls iterator.__next__()
        print(item, end=' ')
    except StopIteration:
        break

Using for loop:
a b c 

Manual iteration:
a b c 

In [8]:
# Examining iterator methods
my_list = [1, 2, 3]
my_iter = iter(my_list)

# Check for protocol methods
print(f"Has __iter__: {hasattr(my_iter, '__iter__')}")
print(f"Has __next__: {hasattr(my_iter, '__next__')}")

# Using methods directly
print(f"\n__iter__() returns self: {my_iter.__iter__() is my_iter}")
print(f"__next__() returns: {my_iter.__next__()}")

Has __iter__: True
Has __next__: True

__iter__() returns self: True
__next__() returns: 1


---

## 3. Creating Custom Iterators

In [9]:
# Simple custom iterator - Count up to N
class CountUp:
    """Iterator that counts from 1 to max_value."""
    
    def __init__(self, max_value):
        self.max_value = max_value
        self.current = 0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        self.current += 1
        if self.current <= self.max_value:
            return self.current
        raise StopIteration

# Use the iterator
counter = CountUp(5)
for num in counter:
    print(num, end=' ')

1 2 3 4 5 

In [10]:
# Custom iterator - Fibonacci sequence
class Fibonacci:
    """Iterator for Fibonacci numbers up to max_count."""
    
    def __init__(self, max_count):
        self.max_count = max_count
        self.count = 0
        self.a = 0
        self.b = 1
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.count >= self.max_count:
            raise StopIteration
        
        self.count += 1
        result = self.a
        self.a, self.b = self.b, self.a + self.b
        return result

# First 10 Fibonacci numbers
print("Fibonacci sequence:")
for num in Fibonacci(10):
    print(num, end=' ')

Fibonacci sequence:
0 1 1 2 3 5 8 13 21 34 

In [11]:
# Separate iterable and iterator classes
class Range:
    """Custom range-like iterable."""
    
    def __init__(self, start, end):
        self.start = start
        self.end = end
    
    def __iter__(self):
        return RangeIterator(self.start, self.end)

class RangeIterator:
    """Iterator for Range class."""
    
    def __init__(self, start, end):
        self.current = start
        self.end = end
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.current >= self.end:
            raise StopIteration
        result = self.current
        self.current += 1
        return result

# Can iterate multiple times (creates new iterator each time)
my_range = Range(1, 5)
print("First iteration:", list(my_range))
print("Second iteration:", list(my_range))

First iteration: [1, 2, 3, 4]
Second iteration: [1, 2, 3, 4]


In [12]:
# Iterator with reverse capability
class ReversibleList:
    """A list that supports both forward and reverse iteration."""
    
    def __init__(self, data):
        self.data = list(data)
    
    def __iter__(self):
        return iter(self.data)
    
    def __reversed__(self):
        return reversed(self.data)

items = ReversibleList([1, 2, 3, 4, 5])

print("Forward:", list(items))
print("Reversed:", list(reversed(items)))

Forward: [1, 2, 3, 4, 5]
Reversed: [5, 4, 3, 2, 1]


---

## 4. Introduction to Generators

**Theory:**
- Generators are a simple way to create iterators
- Use `yield` keyword instead of `return`
- Automatically implement the iterator protocol
- Memory efficient - values generated on-the-fly
- State is preserved between calls

In [13]:
# Simple generator function
def count_up_to(n):
    """Generator that yields numbers from 1 to n."""
    i = 1
    while i <= n:
        yield i
        i += 1

# Create generator object
gen = count_up_to(5)
print(f"Generator type: {type(gen)}")

# Iterate over generator
print("Values:", end=' ')
for num in gen:
    print(num, end=' ')

Generator type: <class 'generator'>
Values: 1 2 3 4 5 

In [14]:
# yield vs return
def with_return():
    return 1
    return 2  # Never reached
    return 3

def with_yield():
    yield 1
    yield 2  # Continues from here on next call
    yield 3

print(f"Return function: {with_return()}")
print(f"Yield function: {list(with_yield())}")

Return function: 1
Yield function: [1, 2, 3]


In [15]:
# Generator state preservation
def stateful_generator():
    print("Starting...")
    yield 1
    print("Resuming after first yield...")
    yield 2
    print("Resuming after second yield...")
    yield 3
    print("Finishing...")

gen = stateful_generator()
print(f"Got: {next(gen)}")
print(f"Got: {next(gen)}")
print(f"Got: {next(gen)}")

Starting...
Got: 1
Resuming after first yield...
Got: 2
Resuming after second yield...
Got: 3


---

## 5. Generator Functions

In [16]:
# Fibonacci generator
def fibonacci(n):
    """Generate first n Fibonacci numbers."""
    a, b = 0, 1
    count = 0
    while count < n:
        yield a
        a, b = b, a + b
        count += 1

print("Fibonacci:", list(fibonacci(10)))

Fibonacci: [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]


In [17]:
# Infinite generator
def infinite_counter(start=0):
    """Generate infinite sequence of numbers."""
    n = start
    while True:
        yield n
        n += 1

# Use with caution - must break manually
counter = infinite_counter(100)
for i, num in enumerate(counter):
    print(num, end=' ')
    if i >= 9:  # Stop after 10 values
        break

100 101 102 103 104 105 106 107 108 109 

In [18]:
# Generator with parameters
def powers_of(base, max_exp):
    """Generate powers of base from 0 to max_exp."""
    for exp in range(max_exp + 1):
        yield base ** exp

print("Powers of 2:", list(powers_of(2, 10)))
print("Powers of 3:", list(powers_of(3, 5)))

Powers of 2: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
Powers of 3: [1, 3, 9, 27, 81, 243]


In [19]:
# Generator that filters
def even_numbers(iterable):
    """Yield only even numbers from iterable."""
    for num in iterable:
        if num % 2 == 0:
            yield num

numbers = range(1, 21)
print("Even numbers:", list(even_numbers(numbers)))

Even numbers: [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]


In [20]:
# Generator that transforms
def squared(iterable):
    """Yield squared values."""
    for num in iterable:
        yield num ** 2

numbers = [1, 2, 3, 4, 5]
print("Squared:", list(squared(numbers)))

Squared: [1, 4, 9, 16, 25]


In [21]:
# Chaining generators (pipeline)
def numbers_gen(n):
    for i in range(1, n + 1):
        yield i

def double(iterable):
    for num in iterable:
        yield num * 2

def add_one(iterable):
    for num in iterable:
        yield num + 1

# Pipeline: numbers -> double -> add_one
pipeline = add_one(double(numbers_gen(5)))
print("Pipeline result:", list(pipeline))

Pipeline result: [3, 5, 7, 9, 11]


In [22]:
# Generator with send() method
def accumulator():
    """Generator that accumulates sent values."""
    total = 0
    while True:
        value = yield total
        if value is not None:
            total += value

acc = accumulator()
next(acc)  # Initialize generator

print(f"Send 10: {acc.send(10)}")
print(f"Send 20: {acc.send(20)}")
print(f"Send 5: {acc.send(5)}")

Send 10: 10
Send 20: 30
Send 5: 35


---

## 6. Generator Expressions

**Syntax:** `(expression for item in iterable if condition)`

Similar to list comprehensions but with parentheses and lazy evaluation.

In [23]:
# Generator expression vs list comprehension
# List comprehension - creates list in memory
list_comp = [x**2 for x in range(10)]

# Generator expression - creates generator object
gen_exp = (x**2 for x in range(10))

print(f"List comprehension type: {type(list_comp)}")
print(f"Generator expression type: {type(gen_exp)}")

print(f"\nList: {list_comp}")
print(f"Generator: {list(gen_exp)}")

List comprehension type: <class 'list'>
Generator expression type: <class 'generator'>

List: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Generator: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [24]:
# Generator expression with condition
evens = (x for x in range(20) if x % 2 == 0)
print("Even numbers:", list(evens))

# Multiple conditions
divisible_by_3_and_5 = (x for x in range(100) if x % 3 == 0 if x % 5 == 0)
print("Divisible by 3 and 5:", list(divisible_by_3_and_5))

Even numbers: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
Divisible by 3 and 5: [0, 15, 30, 45, 60, 75, 90]


In [25]:
# Generator expressions in function calls
# No need for extra parentheses
numbers = [1, 2, 3, 4, 5]

# Sum of squares
total = sum(x**2 for x in numbers)
print(f"Sum of squares: {total}")

# Max of absolute values
values = [-5, 3, -8, 2, -1]
max_abs = max(abs(x) for x in values)
print(f"Max absolute value: {max_abs}")

Sum of squares: 55
Max absolute value: 8


In [26]:
# Nested generator expressions
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

# Flatten matrix
flattened = (num for row in matrix for num in row)
print("Flattened:", list(flattened))

# All combinations
pairs = ((x, y) for x in range(3) for y in range(3))
print("Pairs:", list(pairs))

Flattened: [1, 2, 3, 4, 5, 6, 7, 8, 9]
Pairs: [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]


In [27]:
# Practical example: Processing data
data = ["  hello  ", "  WORLD  ", "  Python  "]

# Clean and transform
cleaned = (s.strip().lower() for s in data)
print("Cleaned:", list(cleaned))

Cleaned: ['hello', 'world', 'python']


---

## 7. yield from Statement

**Theory:**
- `yield from` delegates iteration to another iterable
- Simplifies generator code that yields from sub-iterables
- Introduced in Python 3.3

In [28]:
# Without yield from
def flatten_without(nested):
    for sublist in nested:
        for item in sublist:
            yield item

# With yield from
def flatten_with(nested):
    for sublist in nested:
        yield from sublist

nested = [[1, 2], [3, 4], [5, 6]]
print("Without yield from:", list(flatten_without(nested)))
print("With yield from:", list(flatten_with(nested)))

Without yield from: [1, 2, 3, 4, 5, 6]
With yield from: [1, 2, 3, 4, 5, 6]


In [29]:
# yield from with generators
def gen1():
    yield 1
    yield 2

def gen2():
    yield 3
    yield 4

def combined():
    yield from gen1()
    yield from gen2()
    yield 5

print("Combined:", list(combined()))

Combined: [1, 2, 3, 4, 5]


In [30]:
# Recursive generator with yield from
def deep_flatten(nested):
    """Flatten arbitrarily nested lists."""
    for item in nested:
        if isinstance(item, list):
            yield from deep_flatten(item)
        else:
            yield item

deeply_nested = [1, [2, [3, [4, 5]], 6], [7, 8], 9]
print("Deeply nested:", deeply_nested)
print("Flattened:", list(deep_flatten(deeply_nested)))

Deeply nested: [1, [2, [3, [4, 5]], 6], [7, 8], 9]
Flattened: [1, 2, 3, 4, 5, 6, 7, 8, 9]


In [31]:
# yield from with any iterable
def chain_iterables(*iterables):
    for iterable in iterables:
        yield from iterable

result = chain_iterables([1, 2], (3, 4), "ab", {5, 6})
print("Chained:", list(result))

Chained: [1, 2, 3, 4, 'a', 'b', 5, 6]


---

## 8. Generator Use Cases

In [32]:
# Reading large files line by line
def read_large_file(filename):
    """Generator to read file line by line."""
    with open(filename, 'r') as f:
        for line in f:
            yield line.strip()

# Create a sample file
with open('sample.txt', 'w') as f:
    for i in range(5):
        f.write(f"Line {i+1}\n")

# Process file
for line in read_large_file('sample.txt'):
    print(line)

# Cleanup
import os
os.remove('sample.txt')

Line 1
Line 2
Line 3
Line 4
Line 5


In [33]:
# Infinite sequence generation
def primes():
    """Generate infinite sequence of prime numbers."""
    yield 2
    candidate = 3
    primes_found = [2]
    
    while True:
        is_prime = True
        for p in primes_found:
            if p * p > candidate:
                break
            if candidate % p == 0:
                is_prime = False
                break
        
        if is_prime:
            primes_found.append(candidate)
            yield candidate
        candidate += 2

# Get first 20 primes
prime_gen = primes()
first_20 = [next(prime_gen) for _ in range(20)]
print("First 20 primes:", first_20)

First 20 primes: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]


In [34]:
# Data pipeline processing
def read_data():
    """Simulate reading raw data."""
    data = ["100", "200", "invalid", "300", "400"]
    for item in data:
        yield item

def parse_numbers(data):
    """Parse strings to numbers, skip invalid."""
    for item in data:
        try:
            yield int(item)
        except ValueError:
            pass  # Skip invalid

def apply_discount(numbers, discount=0.1):
    """Apply discount to numbers."""
    for num in numbers:
        yield num * (1 - discount)

# Build pipeline
pipeline = apply_discount(parse_numbers(read_data()))
print("Discounted values:", list(pipeline))

Discounted values: [90.0, 180.0, 270.0, 360.0]


In [35]:
# Batching data
def batch(iterable, size):
    """Yield batches of specified size."""
    batch = []
    for item in iterable:
        batch.append(item)
        if len(batch) == size:
            yield batch
            batch = []
    if batch:  # Yield remaining items
        yield batch

data = range(1, 12)
for b in batch(data, 3):
    print(f"Batch: {b}")

Batch: [1, 2, 3]
Batch: [4, 5, 6]
Batch: [7, 8, 9]
Batch: [10, 11]


In [36]:
# Sliding window
def sliding_window(iterable, size):
    """Yield sliding windows of specified size."""
    window = []
    for item in iterable:
        window.append(item)
        if len(window) == size:
            yield tuple(window)
            window.pop(0)

data = [1, 2, 3, 4, 5, 6, 7]
print("Windows of size 3:")
for window in sliding_window(data, 3):
    print(f"  {window}")

Windows of size 3:
  (1, 2, 3)
  (2, 3, 4)
  (3, 4, 5)
  (4, 5, 6)
  (5, 6, 7)


---

## 9. Memory Efficiency

In [37]:
import sys

# Compare memory usage
# List - stores all values in memory
list_data = [x**2 for x in range(1000)]

# Generator - stores only the formula
gen_data = (x**2 for x in range(1000))

print(f"List size: {sys.getsizeof(list_data)} bytes")
print(f"Generator size: {sys.getsizeof(gen_data)} bytes")

List size: 8856 bytes
Generator size: 200 bytes


In [38]:
# Memory efficient sum of large sequence
# Bad: Creates entire list in memory
# total = sum([x for x in range(10000000)])

# Good: Uses generator, constant memory
total = sum(x for x in range(10000000))
print(f"Sum: {total}")

Sum: 49999995000000


In [39]:
# Processing without loading all data
def process_large_dataset(n):
    """Process items one at a time."""
    for i in range(n):
        # Simulate processing
        yield i * 2

# Only one item in memory at a time
total = 0
count = 0
for value in process_large_dataset(1000000):
    total += value
    count += 1

print(f"Processed {count} items")
print(f"Total: {total}")

Processed 1000000 items
Total: 999999000000


In [40]:
# When to use generators vs lists
print("Use GENERATORS when:")
print("- Processing large datasets")
print("- Only need to iterate once")
print("- Want lazy evaluation")
print("- Building data pipelines")

print("\nUse LISTS when:")
print("- Need to access items multiple times")
print("- Need indexing or slicing")
print("- Need to know length")
print("- Data is small enough to fit in memory")

Use GENERATORS when:
- Processing large datasets
- Only need to iterate once
- Want lazy evaluation
- Building data pipelines

Use LISTS when:
- Need to access items multiple times
- Need indexing or slicing
- Need to know length
- Data is small enough to fit in memory


---

## 10. Key Points

1. **Iterable**: Object with `__iter__()` method
2. **Iterator**: Object with `__iter__()` and `__next__()` methods
3. **Generator function**: Uses `yield` to produce values lazily
4. **Generator expression**: `(expr for x in iterable if condition)`
5. Generators are **memory efficient** - one value at a time
6. Generators are **single-use** - exhausted after iteration
7. `yield from` delegates to sub-generators/iterables
8. Use generators for **large datasets** and **data pipelines**
9. `next()` retrieves values, `StopIteration` signals end
10. `send()` allows two-way communication with generators

---

## 11. Practice Exercises

In [41]:
# Exercise 1: Create a generator that yields all even numbers
# between two given numbers (inclusive)

def even_range(start, end):
    # Your code here:
    pass

# Test: list(even_range(1, 10)) -> [2, 4, 6, 8, 10]

In [42]:
# Exercise 2: Create a custom iterator class that cycles through
# a sequence indefinitely (like itertools.cycle)

class Cycle:
    # Your code here:
    pass

# Test: Take first 10 items from Cycle([1, 2, 3])

In [43]:
# Exercise 3: Create a generator that yields running averages
# of numbers passed to it

def running_average():
    # Your code here (use send())
    pass

# Test: avg = running_average(); next(avg)
# avg.send(10) -> 10.0, avg.send(20) -> 15.0

In [44]:
# Exercise 4: Create a generator that yields unique elements
# from an iterable (preserving order)

def unique(iterable):
    # Your code here:
    pass

# Test: list(unique([1, 2, 2, 3, 1, 4, 3])) -> [1, 2, 3, 4]

In [45]:
# Exercise 5: Create a generator that yields items from nested
# dictionaries in a flattened format (key path: value)

def flatten_dict(d, parent_key=''):
    # Your code here (use yield from for recursion)
    pass

# Test: dict(flatten_dict({'a': 1, 'b': {'c': 2, 'd': {'e': 3}}}))
# -> {'a': 1, 'b.c': 2, 'b.d.e': 3}

---

## Solutions

In [46]:
# Solution 1:
def even_range(start, end):
    # Start from first even number >= start
    current = start if start % 2 == 0 else start + 1
    while current <= end:
        yield current
        current += 2

print("even_range(1, 10):", list(even_range(1, 10)))
print("even_range(4, 12):", list(even_range(4, 12)))

even_range(1, 10): [2, 4, 6, 8, 10]
even_range(4, 12): [4, 6, 8, 10, 12]


In [47]:
# Solution 2:
class Cycle:
    def __init__(self, iterable):
        self.data = list(iterable)
        self.index = 0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if not self.data:
            raise StopIteration
        result = self.data[self.index]
        self.index = (self.index + 1) % len(self.data)
        return result

# Test
cycler = Cycle([1, 2, 3])
result = [next(cycler) for _ in range(10)]
print("Cycle([1, 2, 3]) first 10:", result)

Cycle([1, 2, 3]) first 10: [1, 2, 3, 1, 2, 3, 1, 2, 3, 1]


In [48]:
# Solution 3:
def running_average():
    total = 0
    count = 0
    average = None
    
    while True:
        value = yield average
        if value is not None:
            total += value
            count += 1
            average = total / count

# Test
avg = running_average()
next(avg)  # Initialize
print(f"Send 10: {avg.send(10)}")
print(f"Send 20: {avg.send(20)}")
print(f"Send 30: {avg.send(30)}")
print(f"Send 40: {avg.send(40)}")

Send 10: 10.0
Send 20: 15.0
Send 30: 20.0
Send 40: 25.0


In [49]:
# Solution 4:
def unique(iterable):
    seen = set()
    for item in iterable:
        if item not in seen:
            seen.add(item)
            yield item

# Test
print("unique([1, 2, 2, 3, 1, 4, 3]):", list(unique([1, 2, 2, 3, 1, 4, 3])))
print("unique('abracadabra'):", list(unique('abracadabra')))

unique([1, 2, 2, 3, 1, 4, 3]): [1, 2, 3, 4]
unique('abracadabra'): ['a', 'b', 'r', 'c', 'd']


In [50]:
# Solution 5:
def flatten_dict(d, parent_key=''):
    for key, value in d.items():
        new_key = f"{parent_key}.{key}" if parent_key else key
        if isinstance(value, dict):
            yield from flatten_dict(value, new_key)
        else:
            yield (new_key, value)

# Test
nested = {
    'a': 1,
    'b': {
        'c': 2,
        'd': {
            'e': 3
        }
    },
    'f': 4
}

print("Flattened dict:")
for key, value in flatten_dict(nested):
    print(f"  {key}: {value}")

Flattened dict:
  a: 1
  b.c: 2
  b.d.e: 3
  f: 4
