In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, _):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, _):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, _):
        print(f"Running Classifier")
        return "output"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)

executor = CachedExecutor(pipeline_dict, verbose=True)
executor.run()

executor.results

Pipeline(id=a7612403579142e5958536942d43f41d, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=a98e56a0daff48dea1352eaccff40675, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=3040a46f932e4825aa413c634ed8fc6f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=30228d4944464c6f9f0f9f7dad0a060b, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=1eb17b8c228b4c77b802f5823b220d07, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=f429dacff5e94526858a03e053eee4f6, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=8071d9bc65f546a7ad9a50b5da29da19, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=715e5049b24c46afa80b9e0164f1c3cc, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=3a77e7a3b78a428ea08c8652ba6ff043, tasks=['data_lo

{'a7612403579142e5958536942d43f41d': {'pipeline_id': 'a7612403579142e5958536942d43f41d',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 'a98e56a0daff48dea1352eaccff40675': {'pipeline_id': 'a98e56a0daff48dea1352eaccff40675',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 '3040a46f932e4825aa413c634ed8fc6f': {'pipeline_id': '3040a46f932e4825aa413c634ed8fc6f',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 '30228d4944464c6f9f0f9f7dad0a060b': {'pipeline_id': '30228d4944464c6f9f0f9f7dad0a060b',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 '1eb17b8c228b4c77b802f5823b220d07': {'pipeline_id': '1eb17b8c228b4c77b802f5823b220d07',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 'f429dacff5e94526858a03e053e

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 3.12 seconds


In [3]:
pipeline = Pipeline([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline

Pipeline(id=5920b0c97b864914a004c6a0a4702011, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])

In [4]:
print(repository)
print(executor)

Repository(tasks=['data_loader_0', 'data_loader_1', 'data_loader_2', 'data_loader_3', 'processor_0', 'processor_1', 'processor_2', 'processor_3', 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3'], pipelines=219200)
CachedExecutor(pipelines=32)


In [5]:
executor2 = CachedExecutor(pipeline_dict, cache=executor.cache, verbose=True)
executor2.run();

Pipeline a7612403579142e5958536942d43f41d completed.
Pipeline a98e56a0daff48dea1352eaccff40675 completed.
Pipeline 3040a46f932e4825aa413c634ed8fc6f completed.
Pipeline 30228d4944464c6f9f0f9f7dad0a060b completed.
Pipeline 1eb17b8c228b4c77b802f5823b220d07 completed.
Pipeline f429dacff5e94526858a03e053eee4f6 completed.
Pipeline 8071d9bc65f546a7ad9a50b5da29da19 completed.
Pipeline 715e5049b24c46afa80b9e0164f1c3cc completed.
Pipeline 3a77e7a3b78a428ea08c8652ba6ff043 completed.
Pipeline b8f07145484d406ba39dad2f61d71eac completed.
Pipeline a0c8b75f90a64b8fb2055ecb4117fabc completed.
Pipeline f9b626c63e4c43eeac4c66b39aac432c completed.
Pipeline 500bc3032bda44fc87f65dc6e6dfed01 completed.
Pipeline 1b4c0a8d19e64ee2ab480c70e915fa87 completed.
Pipeline 21f1f57064024d4a9bf43c2606f4d170 completed.
Pipeline 5155af4e53134843a0573a9738ea9f88 completed.
Pipeline 39938282f1d144dca011905aba9f385e completed.
Pipeline 629e39fd9ac84a948dd249448886b7f2 completed.
Pipeline a184740fea5c4f2db98ea962492b8b52 comp

In [6]:
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)


Pipeline(id=a0ce7eaf5a514c7a955358cae0e9fb09, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=963cf51d19aa43b0981591b3ae30d1ae, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=fefaa0fb04f74c3da6ccb47f564146a3, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=8c26402c526641ceb5f6fed4d379b985, tasks=['data_loader', 'processor_1', 'classifier_2', 'classifier_1'])
Pipeline(id=d0e20815eb2046c8ad1f6a9a16caa3c0, tasks=['data_loader', 'classifier_1'])
Pipeline(id=7c80ccb74c644e3f928b3ab69338ce54, tasks=['data_loader', 'classifier_1', 'processor_1', 'classifier_2'])
Pipeline(id=d5fa9ffd10084c1bb3a4dbe6913b1b50, tasks=['data_loader', 'classifier_1', 'classifier_2'])
Pipeline(id=ec228608b6c34eb49a3ecb0926cc0f28, tasks=['data_loader', 'classifier_2'])
Pipeline(id=38b8c2a3aecc4dea86ced6cfa746f3a8, tasks=['data_loader', 'classifier_2', 'processor_1', 'classifier_1'])
Pipeline(id=2e505ab55b7d432396024aea9deaa4ee, tasks=['data_loader