In [1]:
from pypekit import Task, Repository, CachedExecutor

class ExampleTask(Task):
    output_category = "1"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 1"

class ExampleTask2(Task):
    input_category = "1"
    output_category = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask2")
        return "output path 2"

class ExampleTask3(Task):
    input_category = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask3")
        return "output path 3"
    
repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(".", pipelines)
executor.run()

executor.results

Pipeline(id=ca637d9a-043b-49c6-a882-0e40ae585f17, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=b539254a-5a98-46c2-b1ae-ac9f35150bff, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=70b05f57-74d7-43a5-8367-e2f981e11d42, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=5798a581-b76f-4f84-a4d2-b68ba97e50bf, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=479c0f05-2e88-40fe-92ba-e7f165dde5d0, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=18f746e4-3833-4c66-8226-baea06052c92, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=a41fcdef-159f-4ff8-830e-39f00d9a0cd9, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=4ce90b10-4ecb-4f66-b76a-58cad8c64df8, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Running ExampleTask
Running ExampleTask2
Running ExampleTask3
Pipeline ca637d9a-

[{'pipeline_id': 'ca637d9a-043b-49c6-a882-0e40ae585f17',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': 'b539254a-5a98-46c2-b1ae-ac9f35150bff',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '70b05f57-74d7-43a5-8367-e2f981e11d42',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_0']},
 {'pipeline_id': '5798a581-b76f-4f84-a4d2-b68ba97e50bf',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_1']},
 {'pipeline_id': '479c0f05-2e88-40fe-92ba-e7f165dde5d0',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': '18f746e4-3833-4c66-8226-baea06052c92',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': 'a41fcdef-1

In [2]:
n = 100

repository = Repository(
    [(f"example_task1_{i}", ExampleTask()) for i in range(n)] +
    [(f"example_task2_{i}", ExampleTask2()) for i in range(n)] +
    [(f"example_task3_{i}", ExampleTask3()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 1000000 pipelines in 3.94 seconds


In [3]:
class ExampleTask0(Task):
    output_category = "2"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 0"

repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3()),
    ("example_task0_0", ExampleTask0()),
    ("example_task0_1", ExampleTask0())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

Pipeline(id=fdca4dce-fa82-43e4-80b8-e21511581ad8, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=6677181d-6bff-4a6d-ba77-04d303ea4f9b, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=66135dba-64df-4ada-bb5e-eb17ff297004, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=2dcfd437-9f07-4a12-a4e4-9bf910c305a8, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=01500f37-f4c6-42cd-a4be-8092c3ca5840, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=53316d7e-b987-4b0f-b898-02b2d9e5467f, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=056e0008-be71-4d69-935e-405c2afcb710, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=2f7a94cf-7b8e-432c-9651-b1f65885be42, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Pipeline(id=f64790a7-5a80-42b2-bed1-0b92b4297ef4, tasks=['example_task0_0', 'exa