In [1]:
from pypekit import Task, Repository, CachedExecutor

class ExampleTask(Task):
    output_type = "1"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 1"

class ExampleTask2(Task):
    input_type = "1"
    output_type = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask2")
        return "output path 2"

class ExampleTask3(Task):
    input_type = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask3")
        return "output path 3"
    
repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(".", pipelines)
executor.run()

executor.results

Pipeline(id=6b49d284-100a-45be-b4fb-76d9796e642f, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=f530b558-cd5c-42e7-81d3-46a55708922c, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=0ff494c9-bd48-4dee-8a68-8c86be5414b3, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=156fe9d7-c707-4deb-9251-b1ea87840664, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=efa3beb2-b79a-4169-b326-93f90530a76c, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=94de6219-6c76-4bb2-bd63-c47f757be685, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=984fdcd1-99b5-47e7-b4fc-1e78f35137e4, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=9cdf5c8f-1bfc-477d-abed-3e449823ea83, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Running ExampleTask
Running ExampleTask2
Running ExampleTask3
Running ExampleTas

[{'pipeline_id': '6b49d284-100a-45be-b4fb-76d9796e642f',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': 'f530b558-cd5c-42e7-81d3-46a55708922c',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '0ff494c9-bd48-4dee-8a68-8c86be5414b3',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_0']},
 {'pipeline_id': '156fe9d7-c707-4deb-9251-b1ea87840664',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_1']},
 {'pipeline_id': 'efa3beb2-b79a-4169-b326-93f90530a76c',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': '94de6219-6c76-4bb2-bd63-c47f757be685',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '984fdcd1-9

In [2]:
n = 100

repository = Repository(
    [(f"example_task1_{i}", ExampleTask()) for i in range(n)] +
    [(f"example_task2_{i}", ExampleTask2()) for i in range(n)] +
    [(f"example_task3_{i}", ExampleTask3()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 1000000 pipelines in 3.63 seconds


In [3]:
class ExampleTask0(Task):
    output_type = "2"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 0"

repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3()),
    ("example_task0_0", ExampleTask0()),
    ("example_task0_1", ExampleTask0())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

Pipeline(id=0f0a5418-82d9-4151-bf59-143a6e8c0fd7, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=12176cfe-17c7-476e-ac63-5ae64f62aa42, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=68579aaa-8565-44f8-be78-a69dac14c445, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=d91cc723-ab7b-4646-ae4a-d1112839be10, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=403ddb23-e662-40cd-af10-fab536da41e9, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=b4f0c1a0-a8c5-487d-b224-5effd4fc6ddd, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=0efbdf87-8de0-4089-925f-3a1425162873, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=dddf3bb5-bb2f-4bef-ad9d-6f4f879086bb, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Pipeline(id=8bb4d51b-9ce2-456a-8c6a-4b89950cc756, tasks=['example_task0_0', 'exa