In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, input):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, input):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, input):
        print(f"Running Classifier")
        return "output"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=1363a265-af89-4e38-8666-6544ef2e0efc, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=9f11dfb2-f4bd-49b7-b522-670c8d328057, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=7d971f87-9809-4b12-b3f5-3268c70cf25d, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=51bedc2a-1a48-4e57-8732-e52491be9839, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=4e608e5f-3802-45c3-98ea-d2ac25035eb4, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=446c8109-1a06-4fb3-ab44-c2f3da89ae13, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=d4b789e0-6c0d-4bda-b1ad-b8ddbffefd22, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=08a8634e-d741-4f6e-8401-7bf62e0783dd, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=01d7fad6-1dd2-43b

[{'pipeline_id': '1363a265-af89-4e38-8666-6544ef2e0efc',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 {'pipeline_id': '9f11dfb2-f4bd-49b7-b522-670c8d328057',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 {'pipeline_id': '7d971f87-9809-4b12-b3f5-3268c70cf25d',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 {'pipeline_id': '51bedc2a-1a48-4e57-8732-e52491be9839',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 {'pipeline_id': '4e608e5f-3802-45c3-98ea-d2ac25035eb4',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': '446c8109-1a06-4fb3-ab44-c2f3da89ae13',
  'output': 'output',
  'tasks': ['data_loader',
   'processor_1',
   'classifier_1',
   'processor_2',
   'classifier_2']},
 {'pipe

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 3.39 seconds


In [3]:
pipeline = Pipeline([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline

Pipeline(id=d45a225f-eb32-4051-832f-f045d55b146d, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])

In [4]:
print(repository)
print(executor)

Repository(tasks=['data_loader_0', 'data_loader_1', 'data_loader_2', 'data_loader_3', 'processor_0', 'processor_1', 'processor_2', 'processor_3', 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3'], pipelines=219200)
CachedExecutor(pipelines=32)


In [5]:
repository = Repository([
    ["data_loader", DataLoader()],
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=31104c11-d9c1-4bc7-bf2b-c200d00c0789, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=76212315-1fcf-46b5-b9cb-d01180ffc31e, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=8199dfda-8ede-4c16-8146-401b9742bdad, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=750af0d4-ff9b-4eaa-9b19-fd3821cca484, tasks=['data_loader', 'processor_1', 'classifier_2', 'classifier_1'])
Pipeline(id=7f2fb841-c6c0-43c1-a308-a457ecd9b32a, tasks=['data_loader', 'classifier_1'])
Pipeline(id=e75c57f2-3773-405a-8aec-e8c5a4f5e905, tasks=['data_loader', 'classifier_1', 'processor_1', 'classifier_2'])
Pipeline(id=e45ddb53-a0c4-42d5-b874-7872400c00e8, tasks=['data_loader', 'classifier_1', 'classifier_2'])
Pipeline(id=e6e57ea1-865b-4a46-99f7-5ebb93fe08dd, tasks=['data_loader', 'classifier_2'])
Pipeline(id=b59ef5e0-9c2c-4f3c-a25d-1d04537f9fba, tasks=['data_loader', 'classifier_2', 'processor_1', 'classifier_1'])
Pipeline(id=1be3caf3-fc93-412

[{'pipeline_id': '31104c11-d9c1-4bc7-bf2b-c200d00c0789',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': '76212315-1fcf-46b5-b9cb-d01180ffc31e',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_1', 'classifier_2']},
 {'pipeline_id': '8199dfda-8ede-4c16-8146-401b9742bdad',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_2']},
 {'pipeline_id': '750af0d4-ff9b-4eaa-9b19-fd3821cca484',
  'output': 'output',
  'tasks': ['data_loader', 'processor_1', 'classifier_2', 'classifier_1']},
 {'pipeline_id': '7f2fb841-c6c0-43c1-a308-a457ecd9b32a',
  'output': 'output',
  'tasks': ['data_loader', 'classifier_1']},
 {'pipeline_id': 'e75c57f2-3773-405a-8aec-e8c5a4f5e905',
  'output': 'output',
  'tasks': ['data_loader', 'classifier_1', 'processor_1', 'classifier_2']},
 {'pipeline_id': 'e45ddb53-a0c4-42d5-b874-7872400c00e8',
  'output': 'output',
  'tasks': ['data_loader', 'classifier_1', 'classifier