# Experiment 02: TileDB Deep Dive

## 1. Hypothesis & Rationale

**Research Question:** How do TileDB configuration parameters (tiling, compression, caching, threading) affect I/O performance?

**Hypothesis:** TileDB configuration has compound effects on I/O performance. Tiling strategy must match access pattern for optimal performance.

In [None]:
# Parameters (papermill)
BATCH_SIZE = 4
PATCH_SIZE = (64, 64, 64)
NUM_WORKERS = 0
N_WARMUP = 5
N_RUNS = 10
N_BATCHES = 20
N_SUBJECTS = 20
RANDOM_SEED = 42
S3_BUCKET = "souzy-scratch"
TILING_STRATEGIES = ["axial", "isotropic"]

In [None]:
# Parse parameters (papermill passes tuples as strings)
import ast

if isinstance(PATCH_SIZE, str):
    PATCH_SIZE = ast.literal_eval(PATCH_SIZE)
if isinstance(TILING_STRATEGIES, str):
    TILING_STRATEGIES = ast.literal_eval(TILING_STRATEGIES)

In [None]:
import sys

import pandas as pd

# Derive project root from absolute config paths
from benchmarks.config import _BENCHMARKS_DIR, BENCHMARK_DIR, FIGURES_DIR

project_root = _BENCHMARKS_DIR.parent
sys.path.insert(0, str(project_root / "src"))

from benchmarks.infrastructure import (
    benchmark_operation,
    plot_heatmap,
)
from radiobject import RadiObject

## 2. Load Datasets

In [None]:
datasets = {}
for strategy in TILING_STRATEGIES:
    uri = str(BENCHMARK_DIR / f"radiobject-{strategy}")
    datasets[strategy] = RadiObject(uri)
    print(f"Loaded {strategy}: {len(datasets[strategy])} subjects")

## 3. Tiling Strategy vs Access Pattern

In [None]:
tiling_results = []

access_patterns = {
    "axial_slice": lambda vol: vol.axial(vol.shape[2] // 2),
    "coronal_slice": lambda vol: vol.coronal(vol.shape[1] // 2),
    "sagittal_slice": lambda vol: vol.sagittal(vol.shape[0] // 2),
    "roi_32": lambda vol: vol.slice(x=slice(0, 32), y=slice(0, 32), z=slice(0, 32)),
    "roi_64": lambda vol: vol.slice(x=slice(0, 64), y=slice(0, 64), z=slice(0, 64)),
    "roi_128": lambda vol: vol.slice(x=slice(0, 128), y=slice(0, 128), z=slice(0, 128)),
}

for strategy, radi in datasets.items():
    vol = radi.collection(list(radi.collection_names)[0]).iloc[0]
    print(f"\n--- {strategy.upper()} Tiling ---")

    for pattern_name, accessor in access_patterns.items():
        result = benchmark_operation(
            lambda v=vol, a=accessor: a(v),
            "RadiObject",
            f"tiling_{pattern_name}",
            "local",
            tiling=strategy,
            n_warmup=N_WARMUP,
            n_runs=N_RUNS,
        )
        tiling_results.append(
            {
                "tiling": strategy,
                "access_pattern": pattern_name,
                "time_ms": result.time_mean_ms,
                "std_ms": result.time_std_ms,
            }
        )
        print(f"  {pattern_name}: {result.time_mean_ms:.2f} +/- {result.time_std_ms:.2f} ms")

## 4. Results (Tidy Format)

In [None]:
# Tiling results as tidy DataFrame
df = pd.DataFrame(tiling_results)
print(df.to_string(index=False))

In [None]:
# Pivot for heatmap view
df = pd.DataFrame(tiling_results)
pivot = df.pivot(index="access_pattern", columns="tiling", values="time_ms")
print("\nTime (ms) by Access Pattern x Tiling Strategy:")
print(pivot.round(2).to_string())

## 5. Visualizations

In [None]:
FIGURES_DIR.mkdir(parents=True, exist_ok=True)

df = pd.DataFrame(tiling_results)
pivot = df.pivot(index="access_pattern", columns="tiling", values="time_ms")

plot_heatmap(
    pivot.values,
    list(pivot.index),
    list(pivot.columns),
    "Tiling Strategy vs Access Pattern (ms)",
    FIGURES_DIR / "tiling_heatmap.png",
)

## 6. Key Findings

1. **AXIAL tiling:** Optimal for 2D axial slices (reads 1 tile vs many)
2. **ISOTROPIC tiling:** Optimal for 3D ROI extraction
3. **Mismatch Penalty:** Wrong tiling can cause 5-10x slowdown

In [None]:
# Export results
import json
from datetime import datetime

from benchmarks.config import RESULTS_DIR

results_json = {
    "timestamp": datetime.now().isoformat(),
    "experiment": "02_tiledb_deep_dive",
    "tiling_results": tiling_results,
}

output_path = RESULTS_DIR / "02_tiledb_deep_dive_results.json"
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
    json.dump(results_json, f, indent=2)
print(f"Results saved to {output_path}")