# Generators

A way to iterate over a large dataset/list without having an effect on space complexity

### Regular way to iterate over a list:

In [3]:
import memory_profiler as mem_profile
import random
import time

In [13]:
print('Memory (Before):' + str(mem_profile.memory_usage()) + ' MB')


def square_numbers(nums_list):
    result = []

    for num in nums_list:
        result.append(num*num)

    return result

print(square_numbers([1,2,3,4,5,6]))


print('Memory (After):' + str(mem_profile.memory_usage()) + ' MB')

Memory (Before):[28.72265625] MB
[1, 4, 9, 16, 25, 36]
Memory (After):[28.7265625] MB


All 100,000 of those squared numbers will be held in memory, causing space complexity to be O(n)

In [15]:
print('Memory (Before):' + str(mem_profile.memory_usage()) + ' MB')

square_numbers(range(0, 100000))
print("Storing the squared number of numbers between 0 and 100,000...")

print('Memory (After):' + str(mem_profile.memory_usage()) + ' MB')

Memory (Before):[29.56640625] MB
Storing the squared number of numbers between 0 and 100,000...
Memory (After):[33.4453125] MB


### Now, using a generator:

In [16]:
print('Memory (Before):' + str(mem_profile.memory_usage()) + ' MB')


# def square_numbers_generator(nums_list):
#     for num in nums_list:
#         yield (num*num)

#### OR ####


### For list comprehensions, instead of surrounding with square brackets [], surround with parentheses () for generator

## Regular list comprehension:
#square_numbers_generator = [num * num for num in [1,2,3,4,5,6]]


## Generator list comprehension:
square_numbers_generator = (num * num for num in [1,2,3,4,5,6])


print(next(square_numbers_generator))
print(next(square_numbers_generator))
print(next(square_numbers_generator))
print(next(square_numbers_generator))
print(next(square_numbers_generator))
print(next(square_numbers_generator))


## Invoking next() outside of list range will result in an StopIteration exception


print('Memory (After):' + str(mem_profile.memory_usage()) + ' MB')

Memory (Before):[29.41796875] MB
1
4
9
16
25
36
Memory (After):[29.42578125] MB


Not all 100,000 numbers are being stored in memory all at once with generator, only when invoked

In [17]:
print('Memory (Before):' + str(mem_profile.memory_usage()) + ' MB')

square_numbers_generator2 = (num * num for num in range(0, 100000))

print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
print(next(square_numbers_generator2))
### ...

print('Memory (After):' + str(mem_profile.memory_usage()) + ' MB')

Memory (Before):[26.703125] MB
0
1
4
9
16
25
36
49
64
81
100
121
Memory (After):[26.75390625] MB


### Complex example:

Regular list iteration:

In [19]:
names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
majors = ['Math', 'Engineering', 'CompSci', 'Arts', 'Business']


print('Memory (Before):' + str(mem_profile.memory_usage()) + ' MB')


def people_list(num_people):
    result = []

    for i in range(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names),
                    'major': random.choice(majors)
                }
        result.append(person)
        
    return result

t1 = time.time()
people = people_list(1000000)
t2 = time.time()


print('Memory (After):' + str(mem_profile.memory_usage()) + ' MB')
print('Took {} Seconds'.format(t2-t1))

Memory (Before):[35.17578125] MB
Memory (After):[351.953125] MB
Took 6.632803678512573 Seconds


Generator version:

In [22]:
names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
majors = ['Math', 'Engineering', 'CompSci', 'Arts', 'Business']


print('Memory (Before):' + str(mem_profile.memory_usage()) + ' MB')


def people_generator(num_people):
    for i in range(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names),
                    'major': random.choice(majors)
                }
        yield person


t1 = time.time()
people = people_generator(1000000)
t2 = time.time()

print('Memory (After):' + str(mem_profile.memory_usage()) + ' MB')
print('Took {} Seconds'.format(t2-t1))

Memory (Before):[87.41015625] MB
Memory (After):[87.41015625] MB
Took 0.0 Seconds
