In [1]:
from pypekit import Task, Repository, CachedExecutor

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, input_path, output_path):
        print(f"Running DataLoader")
        return "output_path"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Processor")
        return "output_path"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Classifier")
        return "output_path"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(".", pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=2980b323-2dee-4d89-844c-95fbc32558bd, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=3dd4f596-d036-46d5-8bc4-61507cebace7, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=97856ceb-f8e2-47d3-967a-55eacfb947ee, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=349e437c-1359-4a15-b080-5694b9dd679e, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=e624f793-accc-4ccf-9e01-4b765fdb3c95, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=14747eb0-170d-4140-8b3e-cfeb3a57f924, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=49536d91-10e9-4eca-9645-d0803349efa4, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=05851889-bc1f-4b93-8005-e2e09deb3822, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=c9fc9a2b-241f-4e6

[{'pipeline_id': '2980b323-2dee-4d89-844c-95fbc32558bd',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 {'pipeline_id': '3dd4f596-d036-46d5-8bc4-61507cebace7',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 {'pipeline_id': '97856ceb-f8e2-47d3-967a-55eacfb947ee',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 {'pipeline_id': '349e437c-1359-4a15-b080-5694b9dd679e',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 {'pipeline_id': 'e624f793-accc-4ccf-9e01-4b765fdb3c95',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': '14747eb0-170d-4140-8b3e-cfeb3a57f924',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'c

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 1.54 seconds
