In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, input_path, output_path):
        print(f"Running DataLoader")
        return "output_path"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Processor")
        return "output_path"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]
        
    def run(self, input_path, output_path):
        print(f"Running Classifier")
        return "output_path"
    
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=e83d15c1-1c29-4181-a9f6-d6c866a9cdfb, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=ea96d232-24e7-47b7-aec8-1ee3a8af52cb, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=b8080844-a119-47c7-8413-e0c6e61d0ccc, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=eb360f32-bfd6-4d81-bcee-918a6fb62d0c, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=3503077b-eb82-49ee-9791-35e55aba96fc, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=fdd1ec41-df0c-4beb-a7df-6e1278912281, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=49dd446d-04f9-402b-b8fb-cc7a77ae4b47, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=ba51e5c9-b335-432e-8f69-a09dc4ee11a7, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=30a6d028-2361-492

[{'pipeline_id': 'e83d15c1-1c29-4181-a9f6-d6c866a9cdfb',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 {'pipeline_id': 'ea96d232-24e7-47b7-aec8-1ee3a8af52cb',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 {'pipeline_id': 'b8080844-a119-47c7-8413-e0c6e61d0ccc',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 {'pipeline_id': 'eb360f32-bfd6-4d81-bcee-918a6fb62d0c',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 {'pipeline_id': '3503077b-eb82-49ee-9791-35e55aba96fc',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': 'fdd1ec41-df0c-4beb-a7df-6e1278912281',
  'output_path': 'output_path',
  'tasks': ['data_loader',
   'processor_1',
   'c

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 2.40 seconds


In [3]:
pipeline = Pipeline([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline

Pipeline(id=765f3bd4-b9ea-4f8c-bd2a-c4d806bfa955, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])

In [4]:
print(repository)
print(executor)

Repository(tasks=['data_loader_0', 'data_loader_1', 'data_loader_2', 'data_loader_3', 'processor_0', 'processor_1', 'processor_2', 'processor_3', 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3'], pipelines=219200)
CachedExecutor(cache_dir=cache, pipelines=32)


In [10]:
repository = Repository([
    ["data_loader", DataLoader()],
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipelines = repository.build_pipelines()
for pipeline in pipelines:
    print(pipeline)

executor = CachedExecutor(pipelines, verbose=True)
executor.run()

executor.results

Pipeline(id=b2b268f5-837c-4e8f-9ec3-9bbb91652354, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=129f0944-eba9-43a5-a732-19a5241f3b21, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=d5257e4e-4e66-4585-a6d8-1fa6ce469632, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=a3906828-696f-4162-a505-8fc86dd79399, tasks=['data_loader', 'processor_1', 'classifier_2', 'classifier_1'])
Pipeline(id=378cb42a-c06d-464b-980c-d267fa4fc6bf, tasks=['data_loader', 'classifier_1'])
Pipeline(id=cc9d7bb4-6d3b-4c27-8e35-ce2cae6f91e5, tasks=['data_loader', 'classifier_1', 'processor_1', 'classifier_2'])
Pipeline(id=2394bd45-b103-4fa0-afcd-55d2cb8d5aa3, tasks=['data_loader', 'classifier_1', 'classifier_2'])
Pipeline(id=fbaac1ed-46e9-47a7-94e8-30e5f0bf5948, tasks=['data_loader', 'classifier_2'])
Pipeline(id=01a37221-5008-4d8b-8271-956aa49040c5, tasks=['data_loader', 'classifier_2', 'processor_1', 'classifier_1'])
Pipeline(id=a47682ed-4372-42f

[{'pipeline_id': 'b2b268f5-837c-4e8f-9ec3-9bbb91652354',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_1']},
 {'pipeline_id': '129f0944-eba9-43a5-a732-19a5241f3b21',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_1', 'classifier_2']},
 {'pipeline_id': 'd5257e4e-4e66-4585-a6d8-1fa6ce469632',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_2']},
 {'pipeline_id': 'a3906828-696f-4162-a505-8fc86dd79399',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'processor_1', 'classifier_2', 'classifier_1']},
 {'pipeline_id': '378cb42a-c06d-464b-980c-d267fa4fc6bf',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'classifier_1']},
 {'pipeline_id': 'cc9d7bb4-6d3b-4c27-8e35-ce2cae6f91e5',
  'output_path': 'output_path',
  'tasks': ['data_loader', 'classifier_1', 'processor_1', 'classifier_2']},
 {'pipeline_id': '2394bd45-b103-4fa0-afcd-55d2cb8d5aa3',
  'output_path':