In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, _):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]

    def run(self, _):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["raw", "processed"]
    output_types = ["sink", "processed"]

    def run(self, _):
        print(f"Running Classifier")
        return "output"

repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)

executor = CachedExecutor(pipeline_dict, verbose=True)
executor.run()

executor.results

Pipeline(id=b30c3f44e0364eb1a2cbe015e8793f7f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=19c24dd370854609aacdc680aadb116f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])
Pipeline(id=cd3cabd994a9453786d6eb916b25825e, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=41128882c4dd417ca0ee6254b03fe6db, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2', 'classifier_1'])
Pipeline(id=555e70f5a04f4b318acc4b76a8f72c45, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=763677e34a2f498e981cd989046f8897, tasks=['data_loader', 'processor_1', 'classifier_1', 'processor_2', 'classifier_2'])
Pipeline(id=4b13161dde754f9f8ffc0d25efb73e4b, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=d138b9f45463493eb2047471673580b3, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=6111fd4163324586aacba13fb2da592e, tasks=['data_lo

{'b30c3f44e0364eb1a2cbe015e8793f7f': {'pipeline_id': 'b30c3f44e0364eb1a2cbe015e8793f7f',
  'output': 'output',
  'runtime': 3.3500000000075136e-05,
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']},
 '19c24dd370854609aacdc680aadb116f': {'pipeline_id': '19c24dd370854609aacdc680aadb116f',
  'output': 'output',
  'runtime': 3.9100000000069635e-05,
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_1',
   'classifier_2']},
 'cd3cabd994a9453786d6eb916b25825e': {'pipeline_id': 'cd3cabd994a9453786d6eb916b25825e',
  'output': 'output',
  'runtime': 3.3200000000011e-05,
  'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']},
 '41128882c4dd417ca0ee6254b03fe6db': {'pipeline_id': '41128882c4dd417ca0ee6254b03fe6db',
  'output': 'output',
  'runtime': 3.790000000003513e-05,
  'tasks': ['data_loader',
   'processor_1',
   'processor_2',
   'classifier_2',
   'classifier_1']},
 '555e70f5a04f4b318acc4b76a8f72c45': {'pipeline_id': '55

In [2]:
n = 4

repository = Repository(
    [(f"data_loader_{i}", DataLoader()) for i in range(n)] +
    [(f"processor_{i}", Processor()) for i in range(n)] +
    [(f"classifier_{i}", Classifier()) for i in range(n)] 
)

from time import time
start = time()
pipelines = repository.build_pipelines()
print(f"Built {len(pipelines)} pipelines in {time() - start:.2f} seconds")

Built 219200 pipelines in 2.85 seconds


In [3]:
pipeline = Pipeline([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline

Pipeline(id=8975dd906cb24f51ae51b8d5486fc94f, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1', 'classifier_2'])

In [4]:
print(repository)
print(executor)

Repository(tasks=['data_loader_0', 'data_loader_1', 'data_loader_2', 'data_loader_3', 'processor_0', 'processor_1', 'processor_2', 'processor_3', 'classifier_0', 'classifier_1', 'classifier_2', 'classifier_3'], pipelines=219200)
CachedExecutor(pipelines=32)


In [5]:
executor2 = CachedExecutor(pipeline_dict, cache=executor.cache, verbose=True)
executor2.run();

Pipeline b30c3f44e0364eb1a2cbe015e8793f7f completed.
Pipeline 19c24dd370854609aacdc680aadb116f completed.
Pipeline cd3cabd994a9453786d6eb916b25825e completed.
Pipeline 41128882c4dd417ca0ee6254b03fe6db completed.
Pipeline 555e70f5a04f4b318acc4b76a8f72c45 completed.
Pipeline 763677e34a2f498e981cd989046f8897 completed.
Pipeline 4b13161dde754f9f8ffc0d25efb73e4b completed.
Pipeline d138b9f45463493eb2047471673580b3 completed.
Pipeline 6111fd4163324586aacba13fb2da592e completed.
Pipeline b5bbbc5702f34480929abe6022acea95 completed.
Pipeline 98ca0542223144388503e18735fd7bc7 completed.
Pipeline a41e1133ed234994983b202e0446025f completed.
Pipeline a75bd2294473433aa9f9c88b51d664d6 completed.
Pipeline f4a99535f6d243b1bc2b7768b2957f5e completed.
Pipeline 41b1e384e2074b70a8da3f1a9c6b0aaf completed.
Pipeline 6e029851dd314256b39f27c623250eff completed.
Pipeline 4e99894d042e4d96bf6fddd0bdf4d2bc completed.
Pipeline 5b3b6642976746ecbefcac713bc0037b completed.
Pipeline e32220921c504477a53f8f63090a02cb comp

In [6]:
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)


Pipeline(id=e309286563574e3b86727e293a4eb8bf, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=26950be8752245aa97be3854bc47c9ce, tasks=['data_loader', 'processor_1', 'classifier_1', 'classifier_2'])
Pipeline(id=521a87bda75f48378f4766e266d1de27, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=8c20fc91a49244b8822562584efbb765, tasks=['data_loader', 'processor_1', 'classifier_2', 'classifier_1'])
Pipeline(id=fa53b49140d04388be6b712f091d00a5, tasks=['data_loader', 'classifier_1'])
Pipeline(id=57511eef01804b7cb62f83cca3affe98, tasks=['data_loader', 'classifier_1', 'processor_1', 'classifier_2'])
Pipeline(id=ccdc5cf0ddae49898f14df082c85da09, tasks=['data_loader', 'classifier_1', 'classifier_2'])
Pipeline(id=49d569340b394b34b8dd2a9351e51cc8, tasks=['data_loader', 'classifier_2'])
Pipeline(id=b78d3f0057a84ec6b9e88e8afb460e44, tasks=['data_loader', 'classifier_2', 'processor_1', 'classifier_1'])
Pipeline(id=ba1193acee584b24bb8de607959847d7, tasks=['data_loader