In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, _):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, _):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, _):
        print(f"Running Classifier")
        return "output"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)

executor = CachedExecutor(pipeline_dict, verbose=True)
executor.run()

executor.results

Pipeline(id=83c8d827b09e4a66bb40230dbec9adc3, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=940ae1afb4554604baef20e87b8dd313, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=22d90e89da5749c7aa37fc0498276763, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=ea8bb4a085d843c9be2d8c4cbafc18e8, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=4293d8de771e4a9eb02db9e87a596dda, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=19c1f7fcc69342f0a4d6562abdba0bb5, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=14df060277284ea1b3777aaa4945b350, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=33607a1c965d44088ee6b1cbbf4def10, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=21ad37bba6374edebd6cb0ce13ee5beb, tasks=['data_lo

{'83c8d827b09e4a66bb40230dbec9adc3': {'pipeline_id': '83c8d827b09e4a66bb40230dbec9adc3',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 '940ae1afb4554604baef20e87b8dd313': {'pipeline_id': '940ae1afb4554604baef20e87b8dd313',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 '22d90e89da5749c7aa37fc0498276763': {'pipeline_id': '22d90e89da5749c7aa37fc0498276763',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 'ea8bb4a085d843c9be2d8c4cbafc18e8': {'pipeline_id': 'ea8bb4a085d843c9be2d8c4cbafc18e8',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 '4293d8de771e4a9eb02db9e87a596dda': {'pipeline_id': '4293d8de771e4a9eb02db9e87a596dda',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 '19c1f7fcc69342f0a4d6562abdb

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 3.83 seconds


In [3]:
pipeline = Pipeline([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline

Pipeline(id=a2c5f159608e462db36406eef4d63aa3, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])

In [4]:
print(repository)
print(executor)

Repository(tasks=['data_loader_0', 'data_loader_1', 'data_loader_2', 'data_loader_3', 'processor_0', 'processor_1', 'processor_2', 'processor_3', 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3'], pipelines=219200)
CachedExecutor(pipelines=32)


In [5]:
executor2 = CachedExecutor(pipeline_dict, cache=executor.cache, verbose=True)
executor2.run();

Pipeline 83c8d827b09e4a66bb40230dbec9adc3 completed.
Pipeline 940ae1afb4554604baef20e87b8dd313 completed.
Pipeline 22d90e89da5749c7aa37fc0498276763 completed.
Pipeline ea8bb4a085d843c9be2d8c4cbafc18e8 completed.
Pipeline 4293d8de771e4a9eb02db9e87a596dda completed.
Pipeline 19c1f7fcc69342f0a4d6562abdba0bb5 completed.
Pipeline 14df060277284ea1b3777aaa4945b350 completed.
Pipeline 33607a1c965d44088ee6b1cbbf4def10 completed.
Pipeline 21ad37bba6374edebd6cb0ce13ee5beb completed.
Pipeline 491fe4a4848f4d709eb787fb037dcf40 completed.
Pipeline 7a395be160f64a7aa22598f9bd64554f completed.
Pipeline e1d4640335a745ed981cbeefa321d0c9 completed.
Pipeline 5002ac954b2c41ceb48c5ba27a92b143 completed.
Pipeline b1f3dd95c60748f381dd6d3eccd7c6fb completed.
Pipeline 5c4c2235783b446fb651b7478e48babf completed.
Pipeline d6e137a355c74fe9a2876e573031e800 completed.
Pipeline 5cb7b17e343e430c95fd213fdce92d96 completed.
Pipeline 21c9727b5740469f8016d4cf27ff1991 completed.
Pipeline 87393af8010b4a61a444724d77dc6c79 comp

In [6]:
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)


Pipeline(id=a2800a9d7ed6476a85568b001d1de552, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=b787152f63db427a94ea8387abab7cc9, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=0cd552a9bfda4062a46653ec8c28abb9, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=769ba9e30fc04de0bf4bd3c8c211f936, tasks=['data_loader', 'processor_1', 'classifier_2', 'classifier_1'])
Pipeline(id=e1f39f949c624533aebf61bd31a4b6bc, tasks=['data_loader', 'classifier_1'])
Pipeline(id=c763caa1ccf5411eabd54e63f06df786, tasks=['data_loader', 'classifier_1', 'processor_1', 'classifier_2'])
Pipeline(id=f194193978134e5c89dab8e0cf22b46f, tasks=['data_loader', 'classifier_1', 'classifier_2'])
Pipeline(id=40f10cdfe57a41288be18544da438a9a, tasks=['data_loader', 'classifier_2'])
Pipeline(id=943eb7fda3da41388782ab2d619dd2a0, tasks=['data_loader', 'classifier_2', 'processor_1', 'classifier_1'])
Pipeline(id=1b60e32fb9894ad68eb6eeca7287d6f1, tasks=['data_loader