In [13]:
from pypekit import Task, Repository, CachedExecutor

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, input_path, output_path):
        print(f"Running DataLoader")
        return "output_path"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Processor")
        return "output_path"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Classifier")
        return "output_path"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=20ee8d58-3ee4-4990-96fe-bd2401b7f8bc, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=8938f753-f542-49c8-aa21-488fba757994, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=fa5364e1-1021-4e1e-bfe1-cac49da3d62f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=f7442521-5d40-4a09-9ea1-e187bd414d1f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=3fbb4424-82ed-42dd-8e71-3ce54dff538a, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=fc860626-c9cd-4387-b4b6-97b0e12f9bcd, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=351e7185-7b42-4f26-9bfd-d828e9eb879a, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=d369a702-3684-4814-99e7-1ebf0e40c6ff, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=959e5fbf-5577-439

[{'pipeline_id': '20ee8d58-3ee4-4990-96fe-bd2401b7f8bc',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 {'pipeline_id': '8938f753-f542-49c8-aa21-488fba757994',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 {'pipeline_id': 'fa5364e1-1021-4e1e-bfe1-cac49da3d62f',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 {'pipeline_id': 'f7442521-5d40-4a09-9ea1-e187bd414d1f',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 {'pipeline_id': '3fbb4424-82ed-42dd-8e71-3ce54dff538a',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': 'fc860626-c9cd-4387-b4b6-97b0e12f9bcd',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'c

In [14]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 1.45 seconds
