## Simple Generator

In [10]:
def generator():
    i = 0
    while True:
        i = i + 1
        yield i

In [11]:
for item in generator():
    print(item)
    if item > 4:
        break

1
2
3
4
5


## Dataframe of some random values

In [82]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randint(0,100,size=(5, 4)), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,76,71,37,53
1,49,68,69,80
2,28,82,48,89
3,86,60,84,18
4,91,88,58,78


## Generator for dataframe

In [102]:
def DataFrameGenerator(df, batch_size, min_index=0, max_index=None, shuffle=False):
    ''' Sampling n number of rows from dataframe df randomly, within min_index and max_index.
    Outputs:
        - samples in numpy array
        - targets in numpy array
    Opitons: 
        - Shuffle: True or False 
    '''

    if max_index is None:
        max_index = df.shape[0]
    
    i = min_index
    
    while True:
        if shuffle:
            rows = np.random.randint(min_index, max_index, batch_size)
        else:
            if i + batch_size > max_index:
                i = min_index 
            rows = np.arange(i, min(i+batch_size, max_index))
            i = i + len(rows)
            
        samples = np.zeros((len(rows), df.shape[-1]-1)) # exclude the class label column
        targets = np.zeros((len(rows), 1)) # i.e. class label
        
        for j, row in enumerate(rows):
            samples[j] = df.iloc[row,:-1]
            targets[j] = df.iloc[row,-1]
        
        yield samples, targets

In [104]:
count = 1
for items in DataFrameGenerator(df=df, batch_size=5):
    print('count:', count)
    print('samples: ', items[0])
    print('targets: ', items[1])
    count = count + 1
    if count > 7:
        break

count: 1
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]
count: 2
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]
count: 3
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]
count: 4
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]
count: 5
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]
count: 6
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]
count: 7
samples:  [[76. 71. 37.]
 [49. 68. 69.]
 [28. 82. 48.]
 [86. 60. 84.]
 [91. 88. 58.]]
targets:  [[53.]
 [80.]
 [89.]
 [18.]
 [78.]]


In [106]:
count = 1
for items in DataFrameGenerator(df=df, batch_size=5, shuffle=True):
    print('count:', count)
    print('samples: ', items[0])
    print('targets: ', items[1])
    count = count + 1
    if count > 7:
        break

count: 1
samples:  [[91. 88. 58.]
 [86. 60. 84.]
 [49. 68. 69.]
 [76. 71. 37.]
 [91. 88. 58.]]
targets:  [[78.]
 [18.]
 [80.]
 [53.]
 [78.]]
count: 2
samples:  [[49. 68. 69.]
 [49. 68. 69.]
 [91. 88. 58.]
 [91. 88. 58.]
 [86. 60. 84.]]
targets:  [[80.]
 [80.]
 [78.]
 [78.]
 [18.]]
count: 3
samples:  [[86. 60. 84.]
 [86. 60. 84.]
 [86. 60. 84.]
 [28. 82. 48.]
 [28. 82. 48.]]
targets:  [[18.]
 [18.]
 [18.]
 [89.]
 [89.]]
count: 4
samples:  [[91. 88. 58.]
 [28. 82. 48.]
 [28. 82. 48.]
 [49. 68. 69.]
 [76. 71. 37.]]
targets:  [[78.]
 [89.]
 [89.]
 [80.]
 [53.]]
count: 5
samples:  [[91. 88. 58.]
 [91. 88. 58.]
 [28. 82. 48.]
 [28. 82. 48.]
 [28. 82. 48.]]
targets:  [[78.]
 [78.]
 [89.]
 [89.]
 [89.]]
count: 6
samples:  [[91. 88. 58.]
 [76. 71. 37.]
 [91. 88. 58.]
 [91. 88. 58.]
 [76. 71. 37.]]
targets:  [[78.]
 [53.]
 [78.]
 [78.]
 [53.]]
count: 7
samples:  [[49. 68. 69.]
 [76. 71. 37.]
 [76. 71. 37.]
 [86. 60. 84.]
 [76. 71. 37.]]
targets:  [[80.]
 [53.]
 [53.]
 [18.]
 [53.]]
