In [48]:
from concurrent.futures import ThreadPoolExecutor as Pool
from itertools import tee
from time import sleep
from operator import add
from functools import reduce

class ParallelCollection:
    
    def __init__(self, data, np):
        
        self.data = data
        self.np = np
        
    def map( self, func):
        with Pool(self.np) as pool:
            results = pool.map( func, self.data)
        return ParallelCollection(results, self.np)
            
    def filter( self, func):
        with Pool(self.np) as pool:
            futures = [pool.submit(func, x) for x in self.data]
            results = []
            for x,f in zip(self.data, futures):
                if f.result():
                    results.append(x)
        return ParallelCollection(results, self.np)
        
    def sum( self ):
        return sum(self.data)
    
        
        
        
        

In [50]:
%%time

rdd = ParallelCollection( data, 4)

print(rdd.map(slow_add).sum())
print(rdd.filter(slow_odd).sum())

36
12
CPU times: user 4.06 ms, sys: 3.44 ms, total: 7.5 ms
Wall time: 4.01 s


In [51]:
%%time

def slow_add(x):
    sleep(1)
    return x + 1

def slow_odd(x):
    sleep(1)
    return x % 2 == 0

data = list(range(8))

print(sum(map(slow_add, data)))
print(sum(filter(slow_odd, data)))

36
12
CPU times: user 2.58 ms, sys: 2.16 ms, total: 4.74 ms
Wall time: 16 s
