# Basic Example

In [1]:
from pypekit import Task, Repository, CachedExecutor, Pipeline

class DataLoader(Task):
    input_types = ["source"]
    output_types = ["raw"]

    def run(self, _):
        print(f"Running DataLoader")
        return "output"

class Processor(Task):
    input_types = ["raw", "processed"]
    output_types = ["processed"]

    def run(self, _):
        print(f"Running Processor")
        return "output"

class Classifier(Task):
    input_types = ["processed"]
    output_types = ["sink"]

    def run(self, _):
        print(f"Running Classifier")
        return "output"

repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("processor_2", Processor()),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for p in pipeline_dict.values():
    print(p)

Pipeline(id=156fa02e87a143c2b2f05415d8caaa38, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_1'])
Pipeline(id=e49e46649b89496db0208e835b198f9b, tasks=['data_loader', 'processor_1', 'processor_2', 'classifier_2'])
Pipeline(id=ad7829cc92964286b33545281314c8dc, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=6c28d3ea887d4d18b5754fffad9c49b7, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=0ae5e9b6441844439b2b375c96040827, tasks=['data_loader', 'processor_2', 'processor_1', 'classifier_1'])
Pipeline(id=2eb9025d2626413495314ca4dbc167e2, tasks=['data_loader', 'processor_2', 'processor_1', 'classifier_2'])
Pipeline(id=79c7dc52ac594306adfa5490426a73d9, tasks=['data_loader', 'processor_2', 'classifier_1'])
Pipeline(id=a85c94f0f77e48feab4b4c109cb10ed5, tasks=['data_loader', 'processor_2', 'classifier_2'])


In [2]:
executor = CachedExecutor(pipeline_dict, verbose=True)
results = executor.run()

Running DataLoader
Running Processor
Running Processor
Running Classifier
Ran pipeline 156fa02e87a143c2b2f05415d8caaa38. Runtime: 0.00s. 1/8 pipelines completed.
Running Classifier
Ran pipeline e49e46649b89496db0208e835b198f9b. Runtime: 0.00s. 2/8 pipelines completed.
Running Classifier
Ran pipeline ad7829cc92964286b33545281314c8dc. Runtime: 0.00s. 3/8 pipelines completed.
Running Classifier
Ran pipeline 6c28d3ea887d4d18b5754fffad9c49b7. Runtime: 0.00s. 4/8 pipelines completed.
Running Processor
Running Processor
Running Classifier
Ran pipeline 0ae5e9b6441844439b2b375c96040827. Runtime: 0.00s. 5/8 pipelines completed.
Running Classifier
Ran pipeline 2eb9025d2626413495314ca4dbc167e2. Runtime: 0.00s. 6/8 pipelines completed.
Running Classifier
Ran pipeline 79c7dc52ac594306adfa5490426a73d9. Runtime: 0.00s. 7/8 pipelines completed.
Running Classifier
Ran pipeline a85c94f0f77e48feab4b4c109cb10ed5. Runtime: 0.00s. 8/8 pipelines completed.


In [3]:
for r in results.values():
    print(r)

{'pipeline_id': '156fa02e87a143c2b2f05415d8caaa38', 'output': 'output', 'runtime': 0.00011470000000002312, 'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_1']}
{'pipeline_id': 'e49e46649b89496db0208e835b198f9b', 'output': 'output', 'runtime': 0.00011230000000006513, 'tasks': ['data_loader', 'processor_1', 'processor_2', 'classifier_2']}
{'pipeline_id': 'ad7829cc92964286b33545281314c8dc', 'output': 'output', 'runtime': 8.729999999990135e-05, 'tasks': ['data_loader', 'processor_1', 'classifier_1']}
{'pipeline_id': '6c28d3ea887d4d18b5754fffad9c49b7', 'output': 'output', 'runtime': 8.779999999997123e-05, 'tasks': ['data_loader', 'processor_1', 'classifier_2']}
{'pipeline_id': '0ae5e9b6441844439b2b375c96040827', 'output': 'output', 'runtime': 7.61000000001344e-05, 'tasks': ['data_loader', 'processor_2', 'processor_1', 'classifier_1']}
{'pipeline_id': '2eb9025d2626413495314ca4dbc167e2', 'output': 'output', 'runtime': 7.680000000009901e-05, 'tasks': ['data_loader', 'process

# Reusing Cache

In [5]:
new_executor = CachedExecutor(pipeline_dict, cache=executor.cache, verbose=True)
new_executor.run();

Ran pipeline 156fa02e87a143c2b2f05415d8caaa38. Runtime: 0.00s. 1/8 pipelines completed.
Ran pipeline e49e46649b89496db0208e835b198f9b. Runtime: 0.00s. 2/8 pipelines completed.
Ran pipeline ad7829cc92964286b33545281314c8dc. Runtime: 0.00s. 3/8 pipelines completed.
Ran pipeline 6c28d3ea887d4d18b5754fffad9c49b7. Runtime: 0.00s. 4/8 pipelines completed.
Ran pipeline 0ae5e9b6441844439b2b375c96040827. Runtime: 0.00s. 5/8 pipelines completed.
Ran pipeline 2eb9025d2626413495314ca4dbc167e2. Runtime: 0.00s. 6/8 pipelines completed.
Ran pipeline 79c7dc52ac594306adfa5490426a73d9. Runtime: 0.00s. 7/8 pipelines completed.
Ran pipeline a85c94f0f77e48feab4b4c109cb10ed5. Runtime: 0.00s. 8/8 pipelines completed.


# Custom Pipelines

In [8]:
pipeline = Pipeline([
    ("processor_1", Processor()),
    ("processor_2", Processor()),
])
pipeline

Pipeline(id=1cecd257294f48a092c34ad05b72f497, tasks=['processor_1', 'processor_2'])

# Pipelines as Tasks

In [9]:
repository = Repository([
    ("data_loader", DataLoader()),
    ("processor_1", Processor()),
    ("pipeline", pipeline),
    ("classifier_1", Classifier()),
    ("classifier_2", Classifier()),
])
pipeline_dict = repository.build_pipelines()
for pipeline in pipeline_dict.values():
    print(pipeline)


Pipeline(id=8e71b22fbcfe4a1eab90873b59fe0767, tasks=['data_loader', 'processor_1', 'pipeline', 'classifier_1'])
Pipeline(id=484685e395734b1b9c7f884c7efd47e3, tasks=['data_loader', 'processor_1', 'pipeline', 'classifier_2'])
Pipeline(id=c1caa1847d624344be609e049392a9fc, tasks=['data_loader', 'processor_1', 'classifier_1'])
Pipeline(id=bf6074364986466e9296aafd67812295, tasks=['data_loader', 'processor_1', 'classifier_2'])
Pipeline(id=9c823d172b944367aec1e32a63c78db6, tasks=['data_loader', 'pipeline', 'processor_1', 'classifier_1'])
Pipeline(id=b1c079b8031441a7b1d95b669c8e6386, tasks=['data_loader', 'pipeline', 'processor_1', 'classifier_2'])
Pipeline(id=1316e597d0e14868a57dc96f720b6c5e, tasks=['data_loader', 'pipeline', 'classifier_1'])
Pipeline(id=aacbb7660b324f0a82708b92144b25b9, tasks=['data_loader', 'pipeline', 'classifier_2'])
