# playNano - Processing Explorer Notebook

This notebook focuses on **loading**, **processing** (filters, masks, stages, and provenance), and **exporting** (image stacks + GIFs) HS-AFM data programmatically using `playNano`.

> Tip: Run the cells from top to bottom. If you're developing locally inside the repo, install in editable mode first.

In [None]:
# If running inside the playNano repo and playNano is not yet installed, uncomment to install in editable mode
# !pip install -e .

from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

from playNano.afm_stack import AFMImageStack
from playNano.processing.pipeline import ProcessingPipeline
from playNano.processing.filters import register_filters
from playNano.processing.mask_generators import register_masking
from playNano.processing.masked_filters import register_mask_filters
from playNano.io.export_data import save_ome_tiff_stack, save_npz_bundle, save_h5_bundle
from playNano.io.gif_export import export_gif

%matplotlib inline

## 1) Configure input & load
Set your data path and channel, then load an `AFMImageStack`.

In [None]:
from pathlib import Path

# This block find the repo root so the test data can be accessed
def find_repo_root(marker=".git"):
    """Walk up parent directories until we find the repo root (marked by .git folder)."""
    path = Path.cwd()
    for parent in [path] + list(path.parents):
        if (parent / marker).exists():
            return parent
    raise FileNotFoundError("Could not find the repository root (no .git directory found).")

repo_root = find_repo_root()

# The demo path opens demo data from the test suit, to use your own data uncomment the your_path line.
# Then comment out the line that loads demo_path and uncomment the line that loads your_path. 

demo_path = repo_root / "tests" / "resources" / "sample_0.h5-jpk"
# your_path = Path(r"\path\to\your\data.h5-jpk")

channel = 'height_trace'  # common JPK height channel, change if interested in a different channel 

stack = AFMImageStack.load_data(demo_path, channel=channel)
# stack = AFMImageStack.load_data(your_path, channel=channel) # uncomment this to use your_path

Print information and frames of the loaded data.

In [None]:
print(f"Loaded {stack.n_frames} frames; each frame shape: {stack.image_shape}")
print(f"Pixel size: {stack.pixel_size_nm} nm")
print(stack.frame_metadata)

print(f'Loaded {stack.n_frames} frames; frame shape = {stack.image_shape}; pixel size = {stack.pixel_size_nm} nm')
# peek at first frame
plt.figure(figsize=(4,4))
plt.imshow(stack.get_frame(0), cmap='afmhot', origin='lower')
plt.title('Raw Frame 0')
plt.colorbar(label='Height (nm)'); plt.tight_layout(); plt.show()

## 2) Discover available filters & masks
The registry functions list everything that can be added to a `ProcessingPipeline`.

In [None]:
FILTERS = register_filters()
MASKS = register_masking()
MASKED = register_mask_filters()
print(f'Registered filters: {len(FILTERS)}; masks: {len(MASKS)}; masked filters: {len(MASKED)}')

def brief_doc(fn, width=90):
    d = (fn.__doc__ or '').strip().splitlines()
    one = d[0] if d else ''
    return (one[:width] + '...') if len(one) > width else one

print('- Filters -')
for name, fn in sorted(FILTERS.items()):
    print(f'  * {name:24s} : {brief_doc(fn)}')

print('- Masks -')
for name, fn in sorted(MASKS.items()):
    print(f'  * {name:24s} : {brief_doc(fn)}')

print('- Masked filters -')
for name, fn in sorted(MASKED.items()):
    print(f'  * {name:24s} : {brief_doc(fn)}')

## 3) Build a Processing Explorer
Utilities to run a pipeline, then **inspect every stage** on a chosen frame: side-by-side, differences, histograms, and a quick roughness metric.

In [None]:
import math
import re

# A convenient default pipeline you can tweak
DEFAULT_STEPS = [
    ('remove_plane', {}),
    ('mask_mean_offset', {'factor': 1.0}),
    ('row_median_align', {}),
    ('polynomial_flatten', {'order': 2}),
    ('gaussian_filter', {'sigma': 1.5}),
]

def run_pipeline(stack, steps=DEFAULT_STEPS):
    # restore raw if available to start clean
    try:
        stack.restore_raw()
    except Exception:
        pass
    pipe = ProcessingPipeline(stack)
    for name, kwargs in steps:
        if name in MASKS:
            pipe.add_mask(name, **kwargs)
        else:
            pipe.add_filter(name, **kwargs)
    pipe.run()
    return ordered_stage_keys(stack)

def step_index_from_key(k):
    m = re.match(r'^step_(\d+)_', k)
    return int(m.group(1)) if m else (0 if k == 'raw' else 10**9)

def ordered_stage_keys(stack):
    return sorted(list(stack.processed.keys()), key=step_index_from_key)

def find_latest_mask_key(stack, name_contains):
    candidates = [k for k in stack.masks.keys() if name_contains in k]
    if not candidates:
        return None
    return sorted(candidates, key=step_index_from_key)[-1]

def plot_stages(stack, frame_idx=0, keys=None, cmap='afmhot'):
    keys = keys or ordered_stage_keys(stack)
    n = len(keys)
    ncols = min(4, n)
    nrows = math.ceil(n / ncols)
    fig, axes = plt.subplots(nrows, ncols, figsize=(3.2*ncols, 3.2*nrows), squeeze=False)
    for i, k in enumerate(keys):
        ax = axes[i//ncols][i%ncols]
        img = stack.processed[k][frame_idx]
        im = ax.imshow(img, cmap=cmap, origin='lower')
        ax.set_title(k)
        ax.axis('off')
        fig.colorbar(im, ax=ax, fraction=0.046, pad=0.03)
    for j in range(i+1, nrows*ncols):
        axes[j//ncols][j%ncols].axis('off')
    plt.tight_layout()
    return fig


def plot_differences(
    stack, 
    frame_idx=0, 
    key_pairs=None, 
    cmap='coolwarm'
):
    if key_pairs is None:
        raise ValueError("Provide a list of (key_a, key_b) pairs to compare.")
    n = len(key_pairs)
    ncols = min(4, n)
    nrows = math.ceil(n / ncols)
    fig, axes = plt.subplots(nrows, ncols, figsize=(3.2*ncols, 3.2*nrows), squeeze=False)
    for i, (key_a, key_b) in enumerate(key_pairs):
        ax = axes[i//ncols][i%ncols]
        A = stack.processed[key_a][frame_idx]
        B = stack.processed[key_b][frame_idx]
        D = B - A
        im = ax.imshow(D, cmap=cmap, origin='lower')
        ax.set_title(f"{key_b} - {key_a}")
        ax.axis('off')
        fig.colorbar(im, ax=ax, fraction=0.046, pad=0.03)
    for j in range(i+1, nrows*ncols):
        axes[j//ncols][j%ncols].axis('off')
    plt.tight_layout()
    return fig


def roughness_nm(arr):
    return float(np.nanstd(arr))

def summarize_metrics(stack, frame_idx=0, keys=None):
    keys = keys or ordered_stage_keys(stack)
    rows = []
    for k in keys:
        r = roughness_nm(stack.processed[k][frame_idx])
        rows.append((k, r))
    return rows

def show_histograms(stack, frame_idx=0, keys=None, bins=128):
    keys = keys or ordered_stage_keys(stack)
    n = len(keys)
    ncols = min(4, n)
    nrows = math.ceil(n / ncols)
    fig, axes = plt.subplots(nrows, ncols, figsize=(3.2*ncols, 2.6*nrows), squeeze=False)
    for i, k in enumerate(keys):
        ax = axes[i//ncols][i%ncols]
        vals = stack.processed[k][frame_idx].ravel()
        ax.hist(vals, bins=bins, color='steelblue', alpha=0.8)
        ax.set_title(k)
    for j in range(i+1, nrows*ncols):
        axes[j//ncols][j%ncols].axis('off')
    plt.tight_layout(); return fig


### 3.1 Run the default pipeline & inspect all stages

In [None]:
stage_keys = run_pipeline(stack, DEFAULT_STEPS)
stage_keys


In [None]:
frame_idx = 0  # choose a representative frame
_ = plot_stages(stack, frame_idx, stage_keys)

# difference between adjacent stages (change introduced by each step)
key_pairs = list(zip(stage_keys[:-1], stage_keys[1:], strict=False))
plot_differences(stack, frame_idx=0, key_pairs=key_pairs)

# quick roughness table (std of heights)
rows = summarize_metrics(stack, frame_idx, stage_keys)
for k, r in rows:
    print(f'{k:24s}  RMS ~= {r:.3f} nm')

# per-stage histograms
_ = show_histograms(stack, frame_idx, stage_keys)

### 3.2 Parameter sweeps (e.g., Gaussian sigma)
Sweep a parameter and preview the effect (single frame) while keeping the earlier steps fixed.

In [None]:
sigmas = [0.5, 1.0, 1.5, 2.0, 3.0]
fig, axs = plt.subplots(1, len(sigmas), figsize=(3.2*len(sigmas), 3.2))

for i, s in enumerate(sigmas):
    steps = [
        ('remove_plane', {}),
        ('mask_mean_offset', {'factor': 1.0}),
        ('row_median_align', {}),
        ('polynomial_flatten', {'order': 2}),
        ('gaussian_filter', {'sigma': s}),
    ]
    run_pipeline(stack, steps)
    img = stack.processed[ordered_stage_keys(stack)[-1]][0]
    ax = axs[i]
    im = ax.imshow(img, cmap='afmhot', origin='lower')
    ax.set_title(f'sigma = {s}')
    ax.axis('off')

# Reserve space on the right and add the shared colorbar there
fig.subplots_adjust(right=0.88, wspace=0.02)
cbar = fig.colorbar(im, ax=axs.ravel().tolist(), fraction=0.03, pad=0.02)
cbar.set_label('Height (nm)')
plt.show()


### 3.3 Exploring masks
Mask steps generate boolean arrays in `stack.masks`. Visualize a mask and how a masked filter changes the frame.

In [None]:
# Example: build a mask via 'mask_mean_offset' then inspect the mask
# Load a different example dataset.

# The demo path opens demo data from the test suit, to use your own data uncomment the your_path line.
# Then comment out the line that loads demo_path and uncomment the line that loads your_path. 

# Load demo data (or switch to your own path)
mask_demo_path = repo_root / "tests" / "resources" / "jpk_folder_0"
# mask_your_path = Path(r"\path\to\your\data")

channel = 'height_trace'
frame_idx = 0

mask_stack = AFMImageStack.load_data(mask_demo_path, channel=channel)
# mask_stack = AFMImageStack.load_data(mask_your_path, channel=channel)

# Clear the mask used  to flatten and add an different mask step 
# mask_below_threshold masks the area below a set threshold.
mask_proc_steps = DEFAULT_STEPS + [('clear', {})] + [('mask_below_threshold', {'threshold': -0.2})]

run_pipeline(mask_stack, mask_proc_steps)
mask_step_keys = []

# Get all processing step keys
mask_step_keys = []
for step in mask_stack.provenance['processing']['steps']:
    if step.get('processed_key'):
        mask_step_keys.append(step['processed_key'])
    elif step.get('mask_key'):
        mask_step_keys.append(step['mask_key'])
print(mask_step_keys)

plt.imshow(mask_stack.data[frame_idx], cmap='afmhot')
plt.title('Processed frame'); plt.axis('off'); plt.show()

In [None]:
mask_key = 'step_7_mask_below_threshold'
mask = mask_stack.masks.get(mask_key, None)

if mask is not None:
    plt.figure(figsize=(4,4)); plt.imshow(mask[frame_idx], origin='lower', cmap='afmhot')
    plt.title(f'Mask: {mask_key}'); plt.axis('off'); plt.show()
else:
    print('No mask found under key', mask_key)

## 4) Provenance: what ran & with which parameters?
Every pipeline run appends to `stack.provenance['processing']['steps']`.

In [None]:
import json
steps = mask_stack.provenance.get('processing', {}).get('steps', [])
print('Processing steps executed:')
for s in steps:
    print(' * {} - params: {}'.format(s.get('name'), s.get('params')))
# Optional: write a lightweight processing log
out_dir = Path('output'); out_dir.mkdir(exist_ok=True, parents=True)
with open(out_dir/'processing_log.json', 'w') as f:
    json.dump({'processing': mask_stack.provenance.get('processing', {})}, f, indent=2)
print('Wrote', out_dir/'processing_log.json')

## 5) Export processed data & animations
Export to OME-TIFF / NPZ / HDF5, and create GIFs for raw/processed views.

In [None]:
out_dir = Path('output')
base = 'processing_explorer'
out_dir.mkdir(exist_ok=True, parents=True)

# Ensure a processed stack exists (run DEFAULT_STEPS if needed)
if 'gaussian_filter' not in stack.processed:
    _ = run_pipeline(mask_stack, DEFAULT_STEPS)

# --- Bundles (processed) ---
save_ome_tiff_stack(out_dir/f'{base}.ome.tif', mask_stack, raw=False)
save_npz_bundle(out_dir/base, mask_stack, raw=False)
save_h5_bundle(out_dir/base, mask_stack, raw=False)
print('Wrote OME-TIFF, NPZ, H5 bundles to', out_dir)

# --- GIFs ---
export_gif(afm_stack=mask_stack, make_gif=True, output_folder=out_dir, output_name=f'{base}_processed', scale_bar_nm=100, raw=False)
export_gif(afm_stack=mask_stack, make_gif=True, output_folder=out_dir, output_name=f'{base}_raw', scale_bar_nm=100, raw=True)
print('Wrote GIFs to', out_dir)

## 6) Reproducible pipeline file (YAML)
Save the current steps in a CLI-compatible YAML so you can run the same processing outside notebooks.

In [None]:
import yaml
proc_yaml = {'filters': [({'name': n} | dict(kwargs)) for (n, kwargs) in DEFAULT_STEPS]}
with open(Path('output')/'processing.yaml', 'w', encoding='utf8') as f:
    yaml.safe_dump(proc_yaml, f, sort_keys=False)
print('Saved', Path('output')/'processing.yaml')
print('Example CLI:')
print('  playnano process  "%s" --channel %s --processing-file %s --export tif,npz,h5 --make-gif' % (stack.file_path, channel, Path('output')/'processing.yaml'))