# Basic Example

In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, _):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]

    def run(self, _):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["processed"]
    output_types = ["sink"]

    def run(self, _):
        print(f"Running Classifier")
        return "output"

repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for p in pipeline_dict.values():
    print(p)

Pipeline(id=851f7a068beb44ebb7e1ca5bed590ccf, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=cac1f35a60734e38a2a1b6a9215271a0, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=d8e0b0d74bb64b1883b304ea4cc2959b, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=bd6e2124971d4c1fb1d0794ad86eebde, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=4f7780626741448a987f52159c350510, tasks=['data_loader', 'processor_2', 'processor_1', 'classifier_1'])
Pipeline(id=735dc9ba9fb147b09c5be710c5c74eed, tasks=['data_loader', 'processor_2', 'processor_1', 'classifier_2'])
Pipeline(id=7f66a8d8f8124d4c88e33d34bb9ac7df, tasks=['data_loader', 'processor_2', 'classifier_1'])
Pipeline(id=fee76cda622849ff89d5f1d000a55418, tasks=['data_loader', 'processor_2', 'classifier_2'])


In [2]:
executor = CachedExecutor(pipeline_dict, verbose=True)
results = executor.run()

Running DataLoader
Running Processor
Running Processor
Running Classifier
Ran pipeline 851f7a068beb44ebb7e1ca5bed590ccf. Runtime: 0.00s. 1/8 pipelines completed.
Running Classifier
Ran pipeline cac1f35a60734e38a2a1b6a9215271a0. Runtime: 0.00s. 2/8 pipelines completed.
Running Classifier
Ran pipeline d8e0b0d74bb64b1883b304ea4cc2959b. Runtime: 0.00s. 3/8 pipelines completed.
Running Classifier
Ran pipeline bd6e2124971d4c1fb1d0794ad86eebde. Runtime: 0.00s. 4/8 pipelines completed.
Running Processor
Running Processor
Running Classifier
Ran pipeline 4f7780626741448a987f52159c350510. Runtime: 0.00s. 5/8 pipelines completed.
Running Classifier
Ran pipeline 735dc9ba9fb147b09c5be710c5c74eed. Runtime: 0.00s. 6/8 pipelines completed.
Running Classifier
Ran pipeline 7f66a8d8f8124d4c88e33d34bb9ac7df. Runtime: 0.00s. 7/8 pipelines completed.
Running Classifier
Ran pipeline fee76cda622849ff89d5f1d000a55418. Runtime: 0.00s. 8/8 pipelines completed.


In [3]:
for r in results.values():
    print(r)

{'pipeline_id': '851f7a068beb44ebb7e1ca5bed590ccf', 'output': 'output', 'runtime': 0.00010149999999997661, 'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']}
{'pipeline_id': 'cac1f35a60734e38a2a1b6a9215271a0', 'output': 'output', 'runtime': 0.0001006999999999536, 'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']}
{'pipeline_id': 'd8e0b0d74bb64b1883b304ea4cc2959b', 'output': 'output', 'runtime': 9.860000000005975e-05, 'tasks': ['data_loader', 'processor_1', 'classifier_1']}
{'pipeline_id': 'bd6e2124971d4c1fb1d0794ad86eebde', 'output': 'output', 'runtime': 9.230000000004512e-05, 'tasks': ['data_loader', 'processor_1', 'classifier_2']}
{'pipeline_id': '4f7780626741448a987f52159c350510', 'output': 'output', 'runtime': 8.010000000002737e-05, 'tasks': ['data_loader', 'processor_2', 'processor_1', 'classifier_1']}
{'pipeline_id': '735dc9ba9fb147b09c5be710c5c74eed', 'output': 'output', 'runtime': 8.039999999998049e-05, 'tasks': ['data_loader', 'process

# Reusing Cache

In [4]:
new_executor = CachedExecutor(pipeline_dict, cache=executor.cache, verbose=True)
new_executor.run();

Ran pipeline 851f7a068beb44ebb7e1ca5bed590ccf. Runtime: 0.00s. 1/8 pipelines completed.
Ran pipeline cac1f35a60734e38a2a1b6a9215271a0. Runtime: 0.00s. 2/8 pipelines completed.
Ran pipeline d8e0b0d74bb64b1883b304ea4cc2959b. Runtime: 0.00s. 3/8 pipelines completed.
Ran pipeline bd6e2124971d4c1fb1d0794ad86eebde. Runtime: 0.00s. 4/8 pipelines completed.
Ran pipeline 4f7780626741448a987f52159c350510. Runtime: 0.00s. 5/8 pipelines completed.
Ran pipeline 735dc9ba9fb147b09c5be710c5c74eed. Runtime: 0.00s. 6/8 pipelines completed.
Ran pipeline 7f66a8d8f8124d4c88e33d34bb9ac7df. Runtime: 0.00s. 7/8 pipelines completed.
Ran pipeline fee76cda622849ff89d5f1d000a55418. Runtime: 0.00s. 8/8 pipelines completed.


# Custom Pipelines

In [5]:
pipeline = Pipeline([
    ("processor_1", Processor()),
    ("processor_2", Processor()),
])
pipeline

Pipeline(id=d1567f7a10cf4496bfd55958da6a2d54, tasks=['processor_1', 'processor_2'])

# Pipelines as Tasks

In [6]:
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)


Pipeline(id=f67bedf726d74e2f89f36b3ed3c549e3, tasks=['data_loader', 'processor_1', 'pipeline', 'classifier_1'])
Pipeline(id=59b2c38eee81427d97cf1e32ec293510, tasks=['data_loader', 'processor_1', 'pipeline', 'classifier_2'])
Pipeline(id=b5b7acfbfe894299aa7deb912ee6c287, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=e5d5bd0b627e4ae1b149a5ad6406b237, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=b6a6d60757ec4561a66bd07583211726, tasks=['data_loader', 'pipeline', 'processor_1', 'classifier_1'])
Pipeline(id=f4ca7a1f60d646c4bd8d562a33f84a5d, tasks=['data_loader', 'pipeline', 'processor_1', 'classifier_2'])
Pipeline(id=706ed12ffed745b5bb419c8b23666bd3, tasks=['data_loader', 'pipeline', 'classifier_1'])
Pipeline(id=4f7c5662959c49fe917b491edfc64c6d, tasks=['data_loader', 'pipeline', 'classifier_2'])
