In [4]:
from pypekit import Task, Repository, CachedExecutor

class ExampleTask(Task):
    output_category = "1"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 1"

class ExampleTask2(Task):
    input_category = "1"
    output_category = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask2")
        return "output path 2"

class ExampleTask3(Task):
    input_category = "2"
        
    def run(self, input_path, output_path):
        print(f"Running ExampleTask3")
        return "output path 3"
    
repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(".", pipelines)
executor.run()

executor.results

Pipeline(id=a18fd417-c2c7-45a9-9db0-528274059115, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=6009056f-69e6-45e1-95fb-af8ead9056aa, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=0a4d899f-683e-4d52-af65-bbdc986d1db0, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=da99a338-8500-4b9f-9203-9ed799ca484b, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=e024e615-3448-458b-911c-dc24ba0ee511, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=07d80175-044d-473f-a02f-0a9c3e9df96d, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=5e77aa98-bbdb-4b76-a879-986d256114db, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=75896a09-cda2-471f-85a3-df9cfe747a34, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Running ExampleTask
Running ExampleTask2
Running ExampleTask3
Pipeline a18fd417-

[{'pipeline_id': 'a18fd417-c2c7-45a9-9db0-528274059115',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': '6009056f-69e6-45e1-95fb-af8ead9056aa',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '0a4d899f-683e-4d52-af65-bbdc986d1db0',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_0']},
 {'pipeline_id': 'da99a338-8500-4b9f-9203-9ed799ca484b',
  'output_path': 'output path 3',
  'tasks': ['example_task1_0', 'example_task2_1', 'example_task3_1']},
 {'pipeline_id': 'e024e615-3448-458b-911c-dc24ba0ee511',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_0']},
 {'pipeline_id': '07d80175-044d-473f-a02f-0a9c3e9df96d',
  'output_path': 'output path 3',
  'tasks': ['example_task1_1', 'example_task2_0', 'example_task3_1']},
 {'pipeline_id': '5e77aa98-b

In [5]:
n = 100

repository = Repository(
    [(f"example_task1_{i}", ExampleTask()) for i in range(n)] +
    [(f"example_task2_{i}", ExampleTask2()) for i in range(n)] +
    [(f"example_task3_{i}", ExampleTask3()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 1000000 pipelines in 4.62 seconds


In [6]:
class ExampleTask0(Task):
    output_category = "2"

    def run(self, input_path, output_path):
        print(f"Running ExampleTask")
        return "output path 0"

repository = Repository([
    ("example_task1_0", ExampleTask()),
    ("example_task1_1", ExampleTask()),
    ("example_task2_0", ExampleTask2()),
    ("example_task2_1", ExampleTask2()),
    ("example_task3_0", ExampleTask3()),
    ("example_task3_1", ExampleTask3()),
    ("example_task0_0", ExampleTask0()),
    ("example_task0_1", ExampleTask0())
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

Pipeline(id=fb26884d-fbd7-4d6e-8373-413a853e6ce2, tasks=['example_task1_0', 'example_task2_0', 'example_task3_0'])
Pipeline(id=02184c5e-ea83-4e9a-ad98-7e529e975f96, tasks=['example_task1_0', 'example_task2_0', 'example_task3_1'])
Pipeline(id=4522d0ee-ccc6-4ba7-9903-1c6a428cfcd9, tasks=['example_task1_0', 'example_task2_1', 'example_task3_0'])
Pipeline(id=d40e950e-492d-4010-aca0-8592563a1256, tasks=['example_task1_0', 'example_task2_1', 'example_task3_1'])
Pipeline(id=aebc546d-9928-48c9-bcd5-00ef0e4e0ba4, tasks=['example_task1_1', 'example_task2_0', 'example_task3_0'])
Pipeline(id=cb7ceef6-eb8f-4e35-92f0-5efda7e29ed2, tasks=['example_task1_1', 'example_task2_0', 'example_task3_1'])
Pipeline(id=8d610de3-5e2f-4045-901c-4ecddf0e0588, tasks=['example_task1_1', 'example_task2_1', 'example_task3_0'])
Pipeline(id=5dd35c6f-0ba7-48ad-9815-37fddca6ca70, tasks=['example_task1_1', 'example_task2_1', 'example_task3_1'])
Pipeline(id=91a69392-46b6-4f58-b3c9-7ce0202c75a9, tasks=['example_task0_0', 'exa