In [1]:
# generators are used to create custom iterators in python

In [2]:
def square_numbers(nums):
    results = []
    for i in nums:
        results.append(i**2)
    return results
my_nums = square_numbers([1,2,3,4,5,6])
print(my_nums)

[1, 4, 9, 16, 25, 36]


In [3]:
# use generators of the above example
def square_numbers(nums):
    for i in nums:
        yield(i**2) # the yield is what makes it generator
        
my_nums = square_numbers([1,2,3,4,5,6])
print(my_nums)
# we get a generator object than getting a result
# It doesn't hold entire result in memory
# it yields answers one at a time
# it is waiting for us to ask the results

<generator object square_numbers at 0x7f40085714d0>


In [4]:
# get the first result
print(next(my_nums))

1


In [5]:
# get other next results
print(next(my_nums))
print(next(my_nums))
print(next(my_nums))
print(next(my_nums))
print(next(my_nums))

4
9
16
25
36


In [6]:
# what if i run next after i exhaust all the inputs
print(next(my_nums))
# we get an error
# stop iteration means it is out of values

StopIteration: 

In [None]:
# Best method
# Use for loop to get values
my_nums = square_numbers([1,2,3,4,5])
for num in my_nums: # where my_nums is the generator object
    print(num)

In [7]:
# Normal list comprehension
my_nums = [x*x for x in [1,2,3,4,5]]
for num in my_nums:
    print(num)

1
4
9
16
25


In [8]:
# generator comprehension
my_nums = (x*x for x in [1,2,3,4,5]) # instead of square brackets in list comprehension, we use curve brackets
for num in my_nums:
    print(num)

1
4
9
16
25


In [9]:
# what if we want to print out all the values from the generator
my_nums = (x*x for x in [1,2,3,4,5])
print(list(my_nums))
# when we convert it to a list we will lose the advantage of generator

[1, 4, 9, 16, 25]


In [10]:
# We will look at how generators will boost the perfomance when we deal with large dataset

In [11]:
!pip install memory-profiler



In [12]:
import memory_profiler as mem_profile
import random
import time
import resource

In [13]:
# fake data for netflix signin
username = ['chella', 'chellz', 'nivedhithan', 'vihaan', 'akshay', 'tarun', 'manoj', 'mech_prashanth', 'nithyan', 'ranjith']
password = ['12345', '234', 'qwerty', 'asdfg', '09876', 'balls', 'lando', 'mclaren', 'ferrari', 'petronasAMG', 'redbull']

In [14]:
# Normal list stuff

print('Memory usage before: ', mem_profile.memory_usage(), 'MB')

def account_list(num_accounts):
    results = []
    for i in range(num_accounts):
        fake_account = {
            'username': random.choice(username),
            'password': random.choice(password),
            'credit_card_number': random.randint(10000000, 99999999)
        }
        results.append(fake_account)
    return results

start_time = time.process_time()
list_accounts = account_list(1000000)
end_time = time.process_time()

print('Memory usage after: ', mem_profile.memory_usage(), 'MB')
print('Time taken to execute: ', end_time-start_time, 'seconds')

Memory usage before:  [50.625] MB
Memory usage after:  [338.0] MB
Time taken to execute:  2.8799571370000003 seconds


In [15]:
# using generator

print('Memory usage before: ', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024, 'MB')

def account_list(num_accounts):
    for i in range(num_accounts):
        fake_account = {
            'username': random.choice(username),
            'password': random.choice(password),
            'credit_card_number': random.randint(10000000, 99999999)
        }
        yield fake_account

start_time = time.process_time()
generator_accounts = account_list(1000000)
end_time = time.process_time()

print('Memory usage after: ', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024, 'MB')
print('Time taken to execute: ', end_time-start_time, 'seconds')

Memory usage before:  338.0 MB
Memory usage after:  338.0 MB
Time taken to execute:  5.07909999996059e-05 seconds


In [16]:
# using generator and convert back to list

print('Memory usage before: ', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024, 'MB')

def account_list(num_accounts):
    for i in range(num_accounts):
        fake_account = {
            'username': random.choice(username),
            'password': random.choice(password),
            'credit_card_number': random.randint(10000000, 99999999)
        }
        yield fake_account

start_time = time.process_time()
list_generator_accounts = list(account_list(1000000))
end_time = time.process_time()

print('Memory usage after: ', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024, 'MB')
print('Time taken to execute: ', end_time-start_time, 'seconds')

Memory usage before:  338.0 MB
Memory usage after:  624.5625 MB
Time taken to execute:  2.97374356 seconds


In [17]:
# Experiments on my own

In [18]:
# convert generator to list just to calulate the length

print('Memory usage before: ', mem_profile.memory_usage(), 'MB')

def account_list(num_accounts):
    for i in range(num_accounts):
        fake_account = {
            'username': random.choice(username),
            'password': random.choice(password),
            'credit_card_number': random.randint(10000000, 99999999)
        }
        yield fake_account

start_time = time.process_time()
generator_accounts_2 = account_list(1000000)
print('length = ', len(list(generator_accounts_2)))
end_time = time.process_time()

print('Memory usage after: ', mem_profile.memory_usage(), 'MB')
print('Time taken to execute: ', end_time-start_time, 'seconds')

Memory usage before:  [624.82421875] MB
length =  1000000
Memory usage after:  [622.47265625] MB
Time taken to execute:  2.987595003999999 seconds
