In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, _):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, _):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, _):
        print(f"Running Classifier")
        return "output"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)

executor = CachedExecutor(pipeline_dict, verbose=True)
executor.run()

executor.results

Pipeline(id=ac59d9a343154ade869a7dbe15deccb2, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=10f0b3ae81a949299018b20f345c77e6, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=3efd6116653a4c67840fea186da5be51, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=9b8272935bde4f8ba09e945a203bb61b, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=af40ee75a5f049d1949abb1c7a430bce, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=c2b3552b73874406a9219242b27ed215, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=70ae443773c84e7d811868eb3d22afbd, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=df8f7391b4b64a2697eeab09ea4b6ffb, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=7910d163140a45ce808d1b1115652efc, tasks=['data_lo

{'ac59d9a343154ade869a7dbe15deccb2': {'pipeline_id': 'ac59d9a343154ade869a7dbe15deccb2',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 '10f0b3ae81a949299018b20f345c77e6': {'pipeline_id': '10f0b3ae81a949299018b20f345c77e6',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 '3efd6116653a4c67840fea186da5be51': {'pipeline_id': '3efd6116653a4c67840fea186da5be51',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 '9b8272935bde4f8ba09e945a203bb61b': {'pipeline_id': '9b8272935bde4f8ba09e945a203bb61b',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 'af40ee75a5f049d1949abb1c7a430bce': {'pipeline_id': 'af40ee75a5f049d1949abb1c7a430bce',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 'c2b3552b73874406a9219242b27

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 3.48 seconds


In [3]:
pipeline = Pipeline([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline

Pipeline(id=fcbaa2c0f09d453095038f31a4d41c8f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])

In [4]:
print(repository)
print(executor)

Repository(tasks=['data_loader_0', 'data_loader_1', 'data_loader_2', 'data_loader_3', 'processor_0', 'processor_1', 'processor_2', 'processor_3', 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3'], pipelines=219200)
CachedExecutor(pipelines=32)


In [5]:
executor2 = CachedExecutor(pipeline_dict, cache=executor.cache, verbose=True)
executor2.run();

Pipeline ac59d9a343154ade869a7dbe15deccb2 completed.
Pipeline 10f0b3ae81a949299018b20f345c77e6 completed.
Pipeline 3efd6116653a4c67840fea186da5be51 completed.
Pipeline 9b8272935bde4f8ba09e945a203bb61b completed.
Pipeline af40ee75a5f049d1949abb1c7a430bce completed.
Pipeline c2b3552b73874406a9219242b27ed215 completed.
Pipeline 70ae443773c84e7d811868eb3d22afbd completed.
Pipeline df8f7391b4b64a2697eeab09ea4b6ffb completed.
Pipeline 7910d163140a45ce808d1b1115652efc completed.
Pipeline af61c71ab8bc4301b4f1631eb037162d completed.
Pipeline 37014712b1e24f5a8aeafd274ad25b28 completed.
Pipeline d5f8c415823b4544807014074d611898 completed.
Pipeline dcd0e07a33984b78bba9ab9b2a2dc8f3 completed.
Pipeline f90ee128f4a84f7393d921beb0f6ee3a completed.
Pipeline a6794428cbd04a48b26f49cfc5d5af5c completed.
Pipeline 6320cb9cac8a45cf904f2387994e5daa completed.
Pipeline 82ba608a72394bbe82120caa6302ec9d completed.
Pipeline 73d1609952a84a7db3cec97cbba9388e completed.
Pipeline fc00f55b7d0240d9a19f899286a940db comp

In [6]:
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)


Pipeline(id=038b7dbe45474464ad12ad14c28cc9b3, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=2453ffd72928478bb3473fc3162f9b46, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=33c2ba6952ba49f492378bfa32452ed5, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=4e487267e16841759ca1018bf927be55, tasks=['data_loader', 'processor_1', 'classifier_2', 'classifier_1'])
Pipeline(id=51ff69e063334a1ab20621a566a8e4ef, tasks=['data_loader', 'classifier_1'])
Pipeline(id=c2fcd75002724027954c9f296c910aa6, tasks=['data_loader', 'classifier_1', 'processor_1', 'classifier_2'])
Pipeline(id=29fb231355a3407383866381bd1affe8, tasks=['data_loader', 'classifier_1', 'classifier_2'])
Pipeline(id=cb4927ee362041b2ba901922431afe7e, tasks=['data_loader', 'classifier_2'])
Pipeline(id=80298546d7244eae901cc508a89009cd, tasks=['data_loader', 'classifier_2', 'processor_1', 'classifier_1'])
Pipeline(id=ce4d6aa936ae4b8c8f0aeab34d27e7b0, tasks=['data_loader