In [1]:
from pipeline_builder import Task, Repository, CachedExecutor

class ExampleTask(Task):
    output_category = "1"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 1"

class ExampleTask2(Task):
    input_category = "1"
    output_category = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask2")
        return "output path 2"

class ExampleTask3(Task):
    input_category = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask3")
        return "output path 3"
    
repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(".", pipelines)
executor.run()

executor.results

Pipeline(id=05cd1897-364f-4c31-949a-21bdc84c734b, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=4223300f-9727-484e-a75d-81caec313f4b, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=392d322d-9f4c-41c8-8fe4-d2fd8862a978, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=03ce8644-c440-4259-92c9-d1f6abf72db2, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=92171c19-f837-49e5-af53-719399e07e91, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=7ee62fae-51a0-4fd1-8488-3b8df10e8cfb, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=7ae4039f-7e81-48ca-93b2-04b9ae093558, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=72086721-6e6d-4b33-88ca-7109e5e89106, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Running ExampleTask
Running ExampleTask2
Running ExampleTask3
Pipeline 05cd1897-

[{'pipeline_id': '05cd1897-364f-4c31-949a-21bdc84c734b',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': '4223300f-9727-484e-a75d-81caec313f4b',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '392d322d-9f4c-41c8-8fe4-d2fd8862a978',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_0']},
 {'pipeline_id': '03ce8644-c440-4259-92c9-d1f6abf72db2',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_1']},
 {'pipeline_id': '92171c19-f837-49e5-af53-719399e07e91',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': '7ee62fae-51a0-4fd1-8488-3b8df10e8cfb',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '7ae4039f-7

In [2]:
n = 100

repository = Repository(
    [(f"example_task1_{i}", ExampleTask()) for i in range(n)] +
    [(f"example_task2_{i}", ExampleTask2()) for i in range(n)] +
    [(f"example_task3_{i}", ExampleTask3()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 1000000 pipelines in 4.19 seconds


In [6]:
class ExampleTask0(Task):
    output_category = "2"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 0"

repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3()),
    ("example_task0_0", ExampleTask0()),
    ("example_task0_1", ExampleTask0())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

Pipeline(id=e324ccfa-3cfd-4fd5-a752-43315e8c8df2, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=f4387b1c-15f8-4a25-8e04-feb17f31b1e5, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=6bdb06d0-2f61-448a-ba4f-309a68b12d4d, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=1f283541-afe4-4c72-aa15-7565c3060f12, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=9393f50d-18fd-4a0b-a8c7-9a989e83a17a, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=d21b4c81-eb17-4b1e-9e64-07ad8189f873, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=5e1ac230-aecf-463e-82b5-1025cfc2fce6, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=b1353f6e-1537-4f9c-a090-ba77592c6fca, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Pipeline(id=7f247e63-eb23-494b-9a5d-8749c97bc8fa, tasks=['example_task0_0', 'exa