# Creating Custom Pipelines

Here we explain how to create custom pipelines in nerfactory. Pipelines are composed of two components, namely a Dataloader and a Model. We'll show how to make an incremental dataloader with multiple scenes, a model with scene conditioning, and importance sampling at pixels with high loss.

The extensible features that we show in this tutorial are the following:
- a Dataloader that incrementally adds cameras
- a Dataloader that uses multiple scenes
- a Model that is conditioned on the scene
- a Dataloader PixelSampler that samples more densely in regions with high loss

Our goal is to enable Nerfactory users to create an entire custom Pipeline by only editing one file.

```text
#TODO(ethan): show a figure depicting the different modules that go into a pipeline
```

In [1]:
# COLLAPSED
%load_ext autoreload
%autoreload 2

from __future__ import annotations

from typing import Dict, Optional, Tuple, List
import torch

from nerfactory.utils.config import DataloaderConfig, ModelConfig, PipelineConfig, ViewerConfig

from nerfactory.dataloaders.base import Dataloader
from nerfactory.dataloaders.structs import DatasetInputs
from nerfactory.models.base import Model
from nerfactory.pipelines.base import Pipeline

from nerfactory.models.instant_ngp import NGPModel
from nerfactory.cameras.rays import RayBundle

### RayBundle class

In the RayBundle class, we allow uses to add non-default attributes for conditioning.


In [None]:
class CustomDataloaderConfig(DataloaderConfig):
    _target = CustomDataloader
    scene_names: List[str] = ["chair", "drums", "fern", "lego"]
    steps_per_add_cameras: int = 100  # every X iterations, add more cameras
    num_cameras_per_add: int = 10  # number of cameras to add


class CustomDataloader(Dataloader):
    """A custom dataloader that incrementally adds cameras and has multiple scenes."""

    config: CustomDataloaderConfig

    def populate_train_modules(self):
        """Populate the train dataloader modules."""
        self.scene_name_to_image_sampler = {}
        self.scene_name_to_collider = {}  # TODO(ethan): move colliders to the dataloader
        for scene_name in self.config.scene_names:
            dataset_inputs: DatasetInputs = None  # TODO
            image_dataset = ImageDataset(dataset_inputs)
            image_sampler = CacheImageSampler(image_dataset=image_dataset)  # a torch dataloader
            self.scene_name_to_image_sampler[scene_name] = image_sampler
        self.train_pixel_sampler = PixelSampler
        self.train_ray_generator = RayGenerator  # nn.Module

    def populate_eval_modules(self):
        """Populate the eval dataloader modules."""
        # we'll mostly rely on the train dataloader modules
        self.eval_pixel_sampler = EvalPixelSampler  # this will sample entire images

    def sample_images(self) -> List[Image]:
        images = []
        for scene_name in self.scene_name_to_image_sampler.keys():
            image_sampler = self.scene_name_to_image_sampler[scene_name]
            x = image_sampler.forward()  # note that empty forward calls next()
            images.append(x)
        return images

    def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
        """Get the next batch of training data by stringing together the train modules."""
        # grab some images from each scene
        images = self.sample_images()
        pixels = self.train_pixel_sampler.forward(images)
        ray_bundle = self.train_ray_generator(pixels)
        # shape should be (H, W, :)
        ray_bundle.metadata["scene_indices"] = None  # TODO(ethan): populate this with a tensor of scene indices
        dict_ = {}
        return ray_bundle, dict_

    def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
        """Get the next batch of eval data by stringing together the eval modules."""
        # grab some images from each scene
        images = self.sample_images()
        pixels = self.train_pixel_sampler.forward(images)
        ray_bundle = self.eval_ray_generator(pixels)  # notice this is eval and not train
        # shape should be (H, W, :)
        ray_bundle.metadata["scene_indices"] = None  # TODO(ethan): populate this with a tensor of scene indices
        dict_ = {}
        return ray_bundle, dict_


class CustomNGPModelConfig(ModelConfig):
    _target = CustomNGPModel
    coarse_field: str = "temp"
    fine_field: str = "temp2"


class CustomNGPModel(Model):
    """An instant ngp model modified slightly to output semantics."""

    config: SceneConditionNGPModelConfig

    def populate_modules(self):
        self.ngp_model = NGPModel(coarse_field=self.config.coarse_field, fine_field=self.config.fine_field)

    def get_outputs(self, ray_bundle: RayBundle) -> Dict[str, torch.Tensor]:
        # TODO(ethan): pass in batch from the forward function
        # TODO(ethan): rename batch to something else
        outputs = self.ngp_model.forward(ray_bundle)
        outputs["semantics"] = torch.rand_like(outputs["rgb"])
        return outputs

    def get_loss_dict(self):
        return {}


class CustomPipeline(Pipeline):
    """The Instant NGP pipeline."""

    config: PipelineConfig

    def train_step(self, step: int):
        ray_bundle, batch = self.dataloader.next_train(step=step)
        # TODO: maybe run a CNN on the data before passing into model
        model_outputs, loss_dict, metrics_dict = self.model(ray_bundle, batch)
        # TODO: update pixel sampler state with loss map to show the flexibilty of our Pipeline
        self.dataloader.pixel_sampler.update_loss_map()
        return model_outputs, loss_dict, metrics_dict

    def eval_step(self):
        ray_bundle, batch = self.dataloader.next_eval(step=step)

In [None]:
dataloader = Dataloader()
model = SceneConditionNGPModel()
pipeline = CustomPipeline.from_dataloader_and_model(dataloader=dataloader, model=model)

# Creating pipelines from a config

Now we show how to create a pipeline from a config, which has the following form:

```python
@dataclass
class PipelineConfig:
    """Configuration for pipeline instantiation"""

    _target: ClassVar[Type] = Pipeline
    dataloader: DataloaderConfig = MISSING
    model: ModelConfig = MISSING
```

See `nerfactory/utils/config.py` for more details. In this example, we will simply load from an existing configuration from `configs/graph_instant_ngp.yaml`.

In [None]:
import pprint
import hydra

hydra.core.global_hydra.GlobalHydra.instance().clear()
from hydra import compose, initialize

initialize(version_base="1.2", config_path="../configs/")
config_name = "graph_instant_ngp.yaml"
config = compose(config_name)
pipeline_config = config.pipeline
pprint.pprint(pipeline_config)
print("----------------------------------------------------")

# from nerfactory.pipelines.base import setup_pipeline
# pipeline = setup_pipeline(pipeline_config, device="cuda")
pipeline = pipeline_config.setup()

In [None]:
pipeline.get_train_loss_dict()