In [1]:

########## Generator #########

# concept of generators in Python
# A generator is a special type of iterator that allows you to iterate over a sequence of values without storing them all in memory at once.
# Generators are defined using functions with the `yield` keyword or by using generator expressions.
# Generators are more memory efficient than lists, especially when dealing with large datasets.
# Generators are iterators, but not all iterators are generators.

# yield statement is used to turn a function into a generator function
# When Python sees a yield inside a function, it does not execute the function immediately. Instead, it returns a generator object that can be iterated lazily (one item at a time).
# Outside a function, yield makes no sense — Python does not know the context for what should be yielded or when.


# Basic Generator Function

In [10]:
def gen_numbers(n):
    for chunk in range(n):
        yield chunk * 2

In [11]:
chunk = gen_numbers(10)

In [12]:
for g in chunk:
    print(g)

0
2
4
6
8
10
12
14
16
18


In [13]:
# Generator Expression
Generator = (i+5 for i in [1, 6, 4, 8, 4] if i<5)


In [14]:
for g in Generator:
    print(g)


6
9
9


In [None]:

# List vs Generator in Performance and Memory

a = [i for i in range(1000000000)]

t1 = time.time()
sum(a)
t2 = time.time()
print(t2-t1)

b = (g for g in range(1000000000))
t1 = time.time()
sum(b)
t2 = time.time()
print(t2-t1)


In [1]:
# Use generators when you're working with large datasets or streams of data (e.g., log files, sensor data)
def simple_gen():
    yield 1
    yield 2

In [2]:
generator = simple_gen()
print(type(generator))  # <class 'generator'>

<class 'generator'>


In [3]:
for g in generator:
    print(g)

1
2


In [13]:

######## Reading Large Files Line by Line #######

def read_large_file(filepath):
    with open(filepath, "r") as f:
        for line in f:
            yield line.strip()

filepath = r"/content/251052166P001000018.txt"
for g in read_large_file(filepath):
  print(g)
  print("\n")
  print(g.split("|")[9])

DCN|DOCUMENT TYPE|DOCUMENT CATEGORY|PO NUMBER|VENDOR GSTIN|VENDOR NAME|Vendor Group|SHIP TO PLANT NAME|INVOICE NUMBER|INVOICE DATE|INVOICE AMOUNT|BUSINESS DIVISION|Buyer GSTIN|Barcode


INVOICE DATE
251052166P001000018|NON PO|INVOICE|||J K CEMENT LIMITED|||533000104576|04/03/2025|708.36||29AABCJ0355R1Z3|PUN2510500018


04/03/2025


In [14]:

# Chunking Large Data (like text or list)

numbers = [1, 2,3, 4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19, 20]
def chunking(numbers, size):
    for i in range(0, len(numbers), size):
        yield numbers[i:i + size]

for gen in chunking(numbers, 4):
    print(gen)

[1, 2, 3, 4]
[5, 6, 7, 8]
[9, 10, 11, 12]
[13, 14, 15, 16]
[17, 18, 19, 20]


In [15]:
# Composing Generators (Generator Pipelines)
def numbers_gen(n):
    for g in range(n):
        yield g

def squared_gen(seq):
    for g in seq:
        yield g * g

def even_gen(seq):
    for g in seq:
        if g % 2 == 0:
            yield g

# Compose the generators
pipeline = even_gen(squared_gen(numbers_gen(20)))
print(list(pipeline))

[0, 4, 16, 36, 64, 100, 144, 196, 256, 324]


In [17]:
# Generator Expressions (Short form)
squares = (x*x for x in range(1000000) if x % 2 == 0)

0


In [21]:
print(next(squares))  # Output: 0

64


In [25]:
#Generator with Pandas (Streaming DataFrames)
import pandas as pd

def read_csv_in_chunks(filepath, chunksize):
  for chunk in pd.read_csv(filepath, chunksize=chunksize):
    yield chunk

In [26]:
filepath = "/content/jkcl_31fields_group_31fields.csv"
for g in read_csv_in_chunks(filepath, chunksize=200):
  print(g.shape)

(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(200, 4)
(13, 4)


In [34]:
#Basic Custom Iterable Example

In [32]:
class CreateNumberRange:
  def __init__(self, start, end):
    self.start= start
    self.end = end

  def __iter__(self):
    return self
  def __next__(self):
    value = self.start
    if value<=self.end:
      self.start +=2
      return value
    else:
      raise StopIteration



<__main__.CreateNumberRange at 0x7fbc984379d0>

In [33]:
for i in CreateNumberRange(10, 20):
  print(i)

10
12
14
16
18
20
