# Pipeline Orchestrator
Execute the end-to-end workflow with Papermill across the modular notebooks.


This notebook stitches together the full pipeline: data preparation, feature engineering, model building, and evaluation.
It demonstrates how Papermill parameters propagate through each stage.


In [None]:
params = {
    'engine': 'pandas',
    'modin_engine': 'ray',
    'dataset_url': 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv',
    'run_directory': 'runs/latest',
    'project_root': '..',
    'test_size': 0.2,
    'random_state': 42,
    'n_trials': 20,
    'timeout': None
}


In [None]:
from pathlib import Path
import json

import papermill as pm

engine = params.get('engine', 'pandas')
modin_engine = params.get('modin_engine', 'ray')
dataset_url = params.get('dataset_url')
run_directory = Path(params.get('run_directory', 'runs/latest'))
project_root = Path(params.get('project_root', '..')).resolve()
test_size = float(params.get('test_size', 0.2))
random_state = int(params.get('random_state', 42))
n_trials = int(params.get('n_trials', 20))
timeout = params.get('timeout')
timeout = None if timeout in (None, 'None') else float(timeout)

run_directory = (Path.cwd() / run_directory).resolve()
run_directory.mkdir(parents=True, exist_ok=True)

processed_path = project_root / 'data/processed.csv'
summary_path = project_root / 'data/data_prep_summary.json'
train_path = project_root / 'data/train_features.csv'
test_path = project_root / 'data/test_features.csv'
metadata_path = project_root / 'data/feature_metadata.json'
model_path = project_root / 'models/random_forest.pkl'
study_path = project_root / 'models/optuna_trials.csv'
best_params_path = project_root / 'models/best_params.json'
metrics_path = project_root / 'models/metrics.json'

executed_notebooks = []

pm.execute_notebook(
    '01_data_preparation.ipynb',
    str(run_directory / '01_data_preparation.ipynb'),
    parameters={
        'engine': engine,
        'modin_engine': modin_engine,
        'dataset_url': dataset_url,
        'output_path': str(processed_path),
        'summary_path': str(summary_path),
    },
)
executed_notebooks.append(str(run_directory / '01_data_preparation.ipynb'))

pm.execute_notebook(
    '02_feature_engineering.ipynb',
    str(run_directory / '02_feature_engineering.ipynb'),
    parameters={
        'engine': engine,
        'modin_engine': modin_engine,
        'input_path': str(processed_path),
        'train_output_path': str(train_path),
        'test_output_path': str(test_path),
        'feature_metadata_path': str(metadata_path),
        'target_column': 'Survived',
        'test_size': test_size,
        'random_state': random_state,
    },
)
executed_notebooks.append(str(run_directory / '02_feature_engineering.ipynb'))

pm.execute_notebook(
    '03_model_building.ipynb',
    str(run_directory / '03_model_building.ipynb'),
    parameters={
        'train_path': str(train_path),
        'feature_metadata_path': str(metadata_path),
        'model_output_path': str(model_path),
        'study_output_path': str(study_path),
        'best_params_path': str(best_params_path),
        'n_trials': n_trials,
        'timeout': timeout,
        'random_state': random_state,
    },
)
executed_notebooks.append(str(run_directory / '03_model_building.ipynb'))

pm.execute_notebook(
    '04_model_evaluation.ipynb',
    str(run_directory / '04_model_evaluation.ipynb'),
    parameters={
        'test_path': str(test_path),
        'feature_metadata_path': str(metadata_path),
        'model_path': str(model_path),
        'metrics_output_path': str(metrics_path),
    },
)
executed_notebooks.append(str(run_directory / '04_model_evaluation.ipynb'))

{
    'engine': engine,
    'modin_engine': modin_engine,
    'executed_notebooks': executed_notebooks,
    'run_directory': str(run_directory),
    'outputs': {
        'processed_data': str(processed_path),
        'train_data': str(train_path),
        'test_data': str(test_path),
        'model': str(model_path),
        'metrics': str(metrics_path),
    },
}
