# Extended DINOv3 Experiments

This notebook accompanies the extended neurOS evaluation of **DINOv3** backbones
on synthetic datasets inspired by a range of neuroscience modalities.  It
demonstrates how to generate synthetic images, extract placeholder
features via the neurOS DINOv3 plugin, train simple segmentation
classifiers and evaluate cross‑modality performance.  We also assess
translation‑based registration via patch correlations.  All code
illustrated here is self‑contained and does not depend on external
internet resources.


In [None]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

from neuros.plugins.cv.dinov3_backbone import DINOv3Backbone
from neuros.plugins.cv.feature_matching import patch_correlation, estimate_translation


In [None]:

# Mapping of modality names to generation functions defined in the script
from __main__ import generate_dataset

modalities = ["em", "mri", "histology", "connectomics", "atlas", "calcium"]



## Dataset generation

We synthesise images and masks for each of six modalities.  The
functions `generate_dataset` and associated helpers are implemented in
the companion script.  Below we generate a few samples per modality and
display them.


In [None]:

import matplotlib.pyplot as plt
fig, axes = plt.subplots(len(modalities), 4, figsize=(8, 2 * len(modalities)))
for i, modality in enumerate(modalities):
    imgs, masks = generate_dataset(modality, n_samples=4)
    for j, (img, msk) in enumerate(zip(imgs, masks)):
        ax = axes[i, j]
        ax.imshow(img)
        ax.imshow(msk, cmap="Reds", alpha=0.3)
        ax.set_title(f"{modality} #{j}")
        ax.axis("off")
plt.tight_layout()



## Segmentation experiments

We now evaluate segmentation on each modality.  For each backbone
(`cnx-tiny` and `vit-large`), we train a logistic regression on
patch embeddings to classify patches as foreground (mask=1) or
background (mask=0).  We compute accuracy and F1 score on a held‑out
test set.  Finally, we explore cross‑modality transfer by training on
one modality and testing on all others.


In [None]:

from sklearn.metrics import accuracy_score, f1_score

def evaluate_segmentation(modality, backbone_name):
    # generate train and test sets
    train_imgs, train_masks = generate_dataset(modality, n_samples=10)
    test_imgs, test_masks = generate_dataset(modality, n_samples=5)
    # flatten features and labels
    X_train, y_train = flatten_for_segmentation(train_imgs, train_masks, backbone_name)
    X_test, y_test = flatten_for_segmentation(test_imgs, test_masks, backbone_name)
    # if no positive samples, return majority baseline
    if np.sum(y_train) == 0 or np.sum(y_test) == 0:
        y_pred = np.zeros_like(y_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = 0.0
        return acc, f1
    # train logistic regression
    clf = LogisticRegression(max_iter=500)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return acc, f1

# compute segmentation results for each modality and backbone
results = {}
for modality in modalities:
    results[modality] = {}
    for backbone_name in ["cnx-tiny", "vit-large"]:
        acc, f1 = evaluate_segmentation(modality, backbone_name)
        results[modality][backbone_name] = (acc, f1)
        print(f"{modality}-{backbone_name}: accuracy={acc:.3f}, F1={f1:.3f}")


In [None]:

import pandas as pd

df_results = pd.DataFrame({
    (modality, backbone): results[modality][backbone]
    for modality in modalities
    for backbone in ["cnx-tiny", "vit-large"]
}, index=["Accuracy", "F1"]).T

import caas_jupyter_tools
caas_jupyter_tools.display_dataframe_to_user(name="Segmentation results", dataframe=df_results)
df_results


In [None]:

# Cross‑modality generalisation

def evaluate_cross_modal(train_mod, test_mod, backbone_name):
    train_imgs, train_masks = generate_dataset(train_mod, n_samples=10)
    test_imgs, test_masks = generate_dataset(test_mod, n_samples=5)
    X_train, y_train = flatten_for_segmentation(train_imgs, train_masks, backbone_name)
    X_test, y_test = flatten_for_segmentation(test_imgs, test_masks, backbone_name)
    # train majority if no positives
    if np.sum(y_train) == 0 or np.sum(y_test) == 0:
        y_pred = np.zeros_like(y_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = 0.0
        return acc, f1
    clf = LogisticRegression(max_iter=500)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return acc, f1

cross_results = {}
for backbone_name in ["cnx-tiny", "vit-large"]:
    cross_results[backbone_name] = {}
    for train_mod in modalities:
        cross_results[backbone_name][train_mod] = {}
        for test_mod in modalities:
            acc, f1 = evaluate_cross_modal(train_mod, test_mod, backbone_name)
            cross_results[backbone_name][train_mod][test_mod] = (acc, f1)

# display cross results as dataframes for each backbone
for backbone_name in ["cnx-tiny", "vit-large"]:
    data_acc = pd.DataFrame(
        {train_mod: {test_mod: cross_results[backbone_name][train_mod][test_mod][0]
                     for test_mod in modalities}
         for train_mod in modalities}
    )
    data_f1 = pd.DataFrame(
        {train_mod: {test_mod: cross_results[backbone_name][train_mod][test_mod][1]
                     for test_mod in modalities}
         for train_mod in modalities}
    )
    print(f"
Cross‑modality accuracy matrix for {backbone_name}:")
    display(data_acc)
    print(f"
Cross‑modality F1 matrix for {backbone_name}:")
    display(data_f1)



## Registration experiments

To approximate slice‑to‑slice alignment we shift images by multiples of
the 16×16 patch size and attempt to recover the translation from patch
correlations.  For each modality and backbone we compute the mean
absolute error (in patch units) across three shifts: (1, 1), (0, 2)
and (−2, −1).


In [None]:

def evaluate_registration(modality, backbone_name):
    return estimate_shift_error(modality, backbone_name)

reg_results = {}
for modality in modalities:
    reg_results[modality] = {}
    for backbone_name in ["cnx-tiny", "vit-large"]:
        err = evaluate_registration(modality, backbone_name)
        reg_results[modality][backbone_name] = err
        print(f"{modality}-{backbone_name}: mean shift error = {err:.3f} patches")

df_reg = pd.DataFrame(reg_results).T
caas_jupyter_tools.display_dataframe_to_user(name="Registration errors", dataframe=df_reg)
df_reg
