# rtpipeline Part 2: CPU Analysis (DVH & Radiomics

This notebook handles downstream analysis tasks that run efficiently on CPU.

In [None]:
#@title 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title 2. Install Dependencies
!apt-get update && apt-get install -y dcm2niix
import os
if os.path.exists("/content/rtpipeline"):
    %cd /content/rtpipeline
    !pip install -e .[radiomics]
else:
    !git clone https://github.com/kstawiski/rtpipeline.git
    %cd rtpipeline
    !pip install -e .[radiomics]

In [None]:
#@title 3. Configure Paths
# Must match Part 1
INPUT_DIR = "/content/drive/MyDrive/RTPipeline_Data/Input" #@param {type:"string"}
OUTPUT_DIR = "/content/drive/MyDrive/RTPipeline_Data/Output" #@param {type:"string"}
LOGS_DIR = "/content/drive/MyDrive/RTPipeline_Data/Logs" #@param {type:"string"}

## 4. Run Analysis Stages

In [None]:
!rtpipeline dvh     --dicom-root "$INPUT_DIR"     --outdir "$OUTPUT_DIR"     --logs "$LOGS_DIR"     --max-workers 4

In [None]:
!rtpipeline radiomics     --dicom-root "$INPUT_DIR"     --outdir "$OUTPUT_DIR"     --logs "$LOGS_DIR"     --max-workers 8

## 5. Radiomics Robustness (Optional)

This stage performs segmentation perturbation to assess feature stability. It can be slow.

In [None]:
#@title Optional: Enable Radiomics Robustness
# This modifies config.yaml to enable robustness
import yaml

config_path = "config.yaml"
if os.path.exists(config_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f) or {}
    
    # Enable robustness
    if 'radiomics_robustness' not in config:
        config['radiomics_robustness'] = {}
    config['radiomics_robustness']['enabled'] = True
    
    # Default settings if not present
    if 'modes' not in config['radiomics_robustness']:
        config['radiomics_robustness']['modes'] = ['segmentation_perturbation']
    
    with open(config_path, 'w') as f:
        yaml.dump(config, f)
    print("Radiomics robustness enabled in config.yaml")
else:
    print("config.yaml not found, creating basic config with robustness enabled")
    config = {
        'radiomics_robustness': {
            'enabled': True,
            'modes': ['segmentation_perturbation'],
            'segmentation_perturbation': {
                'intensity': 'standard'
            }
        }
    }
    with open(config_path, 'w') as f:
        yaml.dump(config, f)

In [None]:
#@title Run Radiomics Robustness Analysis
import os
import subprocess
from pathlib import Path

# Configuration
input_dir_path = Path(INPUT_DIR)
output_dir_path = Path(OUTPUT_DIR)
config_file = "config.yaml"

# 1. Find all course directories in the output folder
courses = []
if output_dir_path.exists():
    for patient in output_dir_path.iterdir():
        if patient.is_dir() and not patient.name.startswith('.') and patient.name not in ['_RESULTS', 'Data', 'Logs']:
            for course in patient.iterdir():
                 if course.is_dir() and not course.name.startswith('.'):
                     courses.append(course)

print(f"Found {len(courses)} courses for robustness analysis.")

# 2. Run Robustness for each course
parquet_files = []
for course in courses:
    print(f"Processing {course.name}...")
    out_parquet = course / "radiomics_robustness_ct.parquet"
    
    # Skip if exists
    if out_parquet.exists():
        print(f"  Skipping (already exists): {out_parquet.name}")
        parquet_files.append(str(out_parquet))
        continue

    cmd = [
        "rtpipeline", "radiomics-robustness",
        "--course-dir", str(course),
        "--config", config_file,
        "--output", str(out_parquet)
    ]
    try:
        subprocess.run(cmd, check=True)
        parquet_files.append(str(out_parquet))
    except subprocess.CalledProcessError as e:
        print(f"  Failed for {course.name}: {e}")

# 3. Aggregate results
if parquet_files:
    print("\nAggregating results...")
    results_dir = output_dir_path / "_RESULTS"
    results_dir.mkdir(parents=True, exist_ok=True)
    agg_out = results_dir / "radiomics_robustness_summary.xlsx"
    
    cmd_agg = [
        "rtpipeline", "radiomics-robustness-aggregate",
        "--output", str(agg_out),
        "--config", config_file,
        "--inputs"
    ] + parquet_files
    
    try:
        subprocess.run(cmd_agg, check=True)
        print(f"Done! Summary saved to {agg_out}")
    except subprocess.CalledProcessError as e:
        print(f"Aggregation failed: {e}")
else:
    print("No robustness results to aggregate.")