In [None]:
from pypekit import Task, Repository, CachedExecutor

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, input_path, output_path):
        print(f"Running DataLoader")
        return "output_path"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Processor")
        return "output_path"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Classifier")
        return "output_path"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(".", pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=7fdae6ff-8f1a-4671-bf07-45ebc0861e31, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=cd8cec42-ac36-4c18-ab10-5a1bc53e0737, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=a97e4085-81c5-44e4-8c6a-30d5a8f5e204, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=0a0bcbbd-95c8-4591-82bd-cb90b6112a29, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=14eff1b7-8931-4717-9e9c-48d5185980aa, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=4fdce5b0-86e6-4356-9111-483c640e1e84, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=d0a412ed-5604-44ba-9373-0c2365d2a9f4, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=584decd0-1633-4217-8c66-bf0610e9e7dc, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=7b068ef0-fdec-4e3

[{'pipeline_id': '7fdae6ff-8f1a-4671-bf07-45ebc0861e31',
  'output_path': 'output path 3',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 {'pipeline_id': 'cd8cec42-ac36-4c18-ab10-5a1bc53e0737',
  'output_path': 'output path 3',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 {'pipeline_id': 'a97e4085-81c5-44e4-8c6a-30d5a8f5e204',
  'output_path': 'output path 3',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 {'pipeline_id': '0a0bcbbd-95c8-4591-82bd-cb90b6112a29',
  'output_path': 'output path 3',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 {'pipeline_id': '14eff1b7-8931-4717-9e9c-48d5185980aa',
  'output_path': 'output path 3',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': '4fdce5b0-86e6-4356-9111-483c640e1e84',
  'output_path': 'output path 3',
  'tasks': ['data_loader',
   'process

In [10]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 5.17 seconds
