# 03 Building A Pipeline From Zero

This notebook builds the same scaffold used in all templates.

In [None]:
from dataclasses import dataclass, field
from typing import Any, Protocol

@dataclass
class PipelineContext:
    config: dict[str, Any]
    artifacts: dict[str, Any] = field(default_factory=dict)
    metrics: dict[str, float] = field(default_factory=dict)

class PipelineStep(Protocol):
    name: str
    def run(self, ctx: PipelineContext) -> PipelineContext: ...

class AddArtifactStep:
    name = "add_artifact"
    def run(self, ctx: PipelineContext) -> PipelineContext:
        ctx.artifacts["result"] = "outputs/demo/result.txt"
        return ctx

class AddMetricsStep:
    name = "add_metrics"
    def run(self, ctx: PipelineContext) -> PipelineContext:
        ctx.metrics["steps"] = 2.0
        return ctx

def run_pipeline(config: dict[str, Any], steps: list[PipelineStep]) -> PipelineContext:
    ctx = PipelineContext(config=config)
    for step in steps:
        ctx = step.run(ctx)
    return ctx

run_pipeline({"run_name": "demo"}, [AddArtifactStep(), AddMetricsStep()])

## Adaptation practice
Apply the same steps pattern to: an app, a library utility, and a research notebook flow.
Keep the contract (`run(ctx) -> ctx`) unchanged.

## Practice Lab

Add one new step that updates metrics and run the pipeline with three steps.


In [None]:
from dataclasses import dataclass, field
from typing import Any, Protocol

@dataclass
class Ctx:
    config: dict[str, Any]
    artifacts: dict[str, Any] = field(default_factory=dict)
    metrics: dict[str, float] = field(default_factory=dict)

class Step(Protocol):
    name: str
    def run(self, ctx: Ctx) -> Ctx: ...

class AddArtifact:
    name = "artifact"
    def run(self, ctx: Ctx) -> Ctx:
        ctx.artifacts["path"] = "outputs/practice/result.txt"
        return ctx

class AddMetric:
    name = "metric"
    def run(self, ctx: Ctx) -> Ctx:
        ctx.metrics["quality"] = 0.9
        return ctx

class AddDuration:
    name = "duration"
    def run(self, ctx: Ctx) -> Ctx:
        ctx.metrics["duration_seconds"] = 0.01
        return ctx

def run(steps: list[Step]) -> Ctx:
    ctx = Ctx(config={"run_name": "practice"})
    for step in steps:
        ctx = step.run(ctx)
    return ctx

run([AddArtifact(), AddMetric(), AddDuration()])


## Active Learning Practice

1. Add one new step role from memory.
2. Validate your order with the checker.
3. Move one step and predict what breaks.
4. State one reason context passing beats global state.


In [None]:
candidate_order = ["load_config", "build_features", "train", "evaluate", "report"]

def validate_order(order: list[str]) -> dict[str, bool]:
    return {
        "starts_with_config": bool(order) and order[0] == "load_config",
        "ends_with_report": bool(order) and order[-1] == "report",
        "has_evaluate_after_train": order.index("evaluate") > order.index("train"),
    }

checks = validate_order(candidate_order)
{"order": candidate_order, "checks": checks, "all_good": all(checks.values())}
