# A small demo of background generator

[should work in both python2 and python3]


In [1]:
from __future__ import print_function
from prefetch_generator import BackgroundGenerator, background,__doc__

print(__doc__)


Based on http://stackoverflow.com/questions/7323664/python-generator-pre-fetch

This is a single-function package that makes it possible to transform any generator into a `BackgroundGenerator` which computes any number of elements from the generator ahead, in a background thread.

It is quite lightweight, but not entirely weightless.

The `BackgroundGenerator` is most useful when you have a GIL releasing task which might take a long time to complete (e.g. Disk I/O, Web Requests, pure C functions, GPU processing, ...), and another task which takes a similar amount of time, but is dependent on the results of the first task (e.g. Computationally intensive processing of data loaded from disk).

Normally these two tasks will constantly wait for one another to finish. If you make one of these tasks a `BackgroundGenerator` (see examples below), they will work in parallel, potentially saving up to 50% of execution time (definitely less in practice).

We personally use the `BackgroundGenerator

In [2]:
###your super-mega data iterator
import numpy as np
import time

def iterate_minibatches(n_batches, batch_size=10):
    for b_i in range(n_batches):
        time.sleep(0.1) #here it could read file or SQL-get or do some math
        X = np.random.normal(size=[batch_size,20])
        y = np.random.randint(0,2,size=batch_size)
        yield X,y

### regular mode

In [3]:
%%time

#tqdm made in china
print('/'+'-'*42+' Progress Bar ' + '-'*42 + '\\')

for b_x,b_y in iterate_minibatches(50):
    #training
    time.sleep(0.1) #here it could use GPU for example
    print('!',end=" ")
    
print()

/------------------------------------------ Progress Bar ------------------------------------------\
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 
CPU times: user 100 ms, sys: 20 ms, total: 120 ms
Wall time: 10.1 s


### with prefetch

In [4]:
%%time

print('/'+'-'*42+' Progress Bar ' + '-'*42 + '\\')

for b_x,b_y in BackgroundGenerator(iterate_minibatches(50)):
    #training
    time.sleep(0.1) #here it could use some GPU
    print('!',end=" ")
print()

/------------------------------------------ Progress Bar ------------------------------------------\
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 
CPU times: user 68 ms, sys: 16 ms, total: 84 ms
Wall time: 5.14 s


### Same with decorator

In [5]:
###your super-mega data iterator again, now with background decorator
import numpy as np
import time

@background(max_prefetch=3)
def bg_iterate_minibatches(n_batches, batch_size=10):
    for b_i in range(n_batches):
        time.sleep(0.1) #here it could read file or SQL-get or do some math
        X = np.random.normal(size=[batch_size,20])
        y = np.random.randint(0,2,size=batch_size)
        yield X,y


In [6]:
%%time

print('/'+'-'*42+' Progress Bar ' + '-'*42 + '\\')

for b_x,b_y in bg_iterate_minibatches(50):
    #training
    time.sleep(0.1)#you guessed it
    print('!',end=" ")
print()

/------------------------------------------ Progress Bar ------------------------------------------\
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 
CPU times: user 56 ms, sys: 20 ms, total: 76 ms
Wall time: 5.14 s
