# OpenProblems Denoising Benchmark

Reproducing and comparing denoising methods on:
- **PBMC** (1k Peripheral Blood Mononuclear Cells)
- **Tabula Muris Senis Lung**

Scores are normalized between:
- **No denoising** = 0.0 (baseline: just use training data)
- **Perfect denoising** = 1.0 (oracle: use test data)


In [None]:
import os
os.environ["OPENBLAS_NUM_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"

In [None]:
import numpy as np
import pandas as pd
import scprep

from openproblems.tasks.denoising.datasets.pbmc import pbmc
from openproblems.tasks.denoising.datasets.tabula_muris_senis import tabula_muris_senis_lung_random
from openproblems.tasks.denoising.methods.magic import magic_approx_reverse_norm
from openproblems.tasks.denoising.methods.baseline import no_denoising, perfect_denoising
from openproblems.tasks.denoising.metrics.mse import mse
from openproblems.tasks.denoising.metrics.poisson import poisson

from denoise_ttt import magic_denoise


In [None]:
def ttt_denoise(adata, verbose=False):
    X_train = scprep.utils.toarray(adata.obsm["train"])
    denoised = magic_denoise(X_train, verbose=verbose)
    adata.obsm["denoised"] = denoised
    return adata

def normalize_score(score, no_denoise_score, perfect_score):
    return (no_denoise_score - score) / (no_denoise_score - perfect_score)

EXPECTED_MAGIC = {
    'pbmc': {
        'mse_raw': 0.1888,
        'poisson_raw': 0.0495,
        'mse_norm': 0.30,
        'poisson_norm': 0.98,
        'mean_score': 0.64
    },
    'tabula': {
        'mse_raw': 0.1841,
        'poisson_raw': 0.0297,
        'mse_norm': 0.30,
        'poisson_norm': 0.98,
        'mean_score': 0.64
    },
}
print("ok")

---
## 1. PBMC Dataset


In [None]:
adata_pbmc = pbmc(test=False)
print(f"PBMC: {adata_pbmc.shape[0]} cells × {adata_pbmc.shape[1]} genes")


### 1.1 Baselines (No Denoising & Perfect Denoising)


In [None]:
pbmc_no_denoise = no_denoising(adata_pbmc.copy())
pbmc_perfect = perfect_denoising(adata_pbmc.copy())

mse_pbmc_none = mse(pbmc_no_denoise)
mse_pbmc_perfect = mse(pbmc_perfect)
poisson_pbmc_none = poisson(pbmc_no_denoise)
poisson_pbmc_perfect = poisson(pbmc_perfect)

print(f"No denoising:      MSE={mse_pbmc_none:.6f}, Poisson={poisson_pbmc_none:.6f}")
print(f"Perfect denoising: MSE={mse_pbmc_perfect:.6f}, Poisson={poisson_pbmc_perfect:.6f}")


### 1.2 MAGIC (approximate, reversed normalization)


In [None]:
pbmc_magic = magic_approx_reverse_norm(adata_pbmc.copy())
mse_pbmc_magic = mse(pbmc_magic)
poisson_pbmc_magic = poisson(pbmc_magic)
mse_norm_pbmc_magic = normalize_score(mse_pbmc_magic, mse_pbmc_none, mse_pbmc_perfect)
poisson_norm_pbmc_magic = normalize_score(poisson_pbmc_magic, poisson_pbmc_none, poisson_pbmc_perfect)
mean_score_pbmc_magic = (mse_norm_pbmc_magic + poisson_norm_pbmc_magic) / 2

exp = EXPECTED_MAGIC['pbmc']
print("MAGIC (approx, rev norm) - PBMC:")
print(f"  MSE:     {mse_pbmc_magic:.4f} (expected: {exp['mse_raw']:.4f}) | norm: {mse_norm_pbmc_magic:.2f} (expected: {exp['mse_norm']:.2f})")
print(f"  Poisson: {poisson_pbmc_magic:.4f} (expected: {exp['poisson_raw']:.4f}) | norm: {poisson_norm_pbmc_magic:.2f} (expected: {exp['poisson_norm']:.2f})")
print(f"  Mean score: {mean_score_pbmc_magic:.2f} (expected: {exp['mean_score']:.2f})")


### 1.3 TTT Denoiser


In [None]:
pbmc_ttt = ttt_denoise(adata_pbmc.copy(), verbose=True)
mse_pbmc_ttt = mse(pbmc_ttt)
poisson_pbmc_ttt = poisson(pbmc_ttt)
mse_norm_pbmc_ttt = normalize_score(mse_pbmc_ttt, mse_pbmc_none, mse_pbmc_perfect)
poisson_norm_pbmc_ttt = normalize_score(poisson_pbmc_ttt, poisson_pbmc_none, poisson_pbmc_perfect)
mean_score_pbmc_ttt = (mse_norm_pbmc_ttt + poisson_norm_pbmc_ttt) / 2

print("TTT (MSE mode) - PBMC:")
print(f"  MSE:     {mse_pbmc_ttt:.4f} | norm: {mse_norm_pbmc_ttt:.2f}")
print(f"  Poisson: {poisson_pbmc_ttt:.4f} | norm: {poisson_norm_pbmc_ttt:.2f}")
print(f"  Mean score: {mean_score_pbmc_ttt:.2f}")


### 1.4 PBMC Results


In [None]:
pbmc_results = pd.DataFrame({
    'Method': ['No denoising', 'Perfect denoising', 'MAGIC (approx, rev norm)', 'TTT (MSE)'],
    'MSE (raw)': [mse_pbmc_none, mse_pbmc_perfect, mse_pbmc_magic, mse_pbmc_ttt],
    'Poisson (raw)': [poisson_pbmc_none, poisson_pbmc_perfect, poisson_pbmc_magic, poisson_pbmc_ttt],
    'MSE (norm)': [0.0, 1.0, mse_norm_pbmc_magic, mse_norm_pbmc_ttt],
    'Poisson (norm)': [0.0, 1.0, poisson_norm_pbmc_magic, poisson_norm_pbmc_ttt],
    'Mean Score': [0.0, 1.0, mean_score_pbmc_magic, mean_score_pbmc_ttt]
})
pbmc_results.round(5)


---
## 2. Tabula Muris Senis Lung Dataset


In [None]:
adata_tabula = tabula_muris_senis_lung_random(test=False)
print(f"Tabula Muris: {adata_tabula.shape[0]} cells × {adata_tabula.shape[1]} genes")


### 2.1 Baselines


In [None]:
tabula_no_denoise = no_denoising(adata_tabula.copy())
tabula_perfect = perfect_denoising(adata_tabula.copy())

mse_tabula_none = mse(tabula_no_denoise)
mse_tabula_perfect = mse(tabula_perfect)
poisson_tabula_none = poisson(tabula_no_denoise)
poisson_tabula_perfect = poisson(tabula_perfect)

print(f"No denoising:      MSE={mse_tabula_none:.6f}, Poisson={poisson_tabula_none:.6f}")
print(f"Perfect denoising: MSE={mse_tabula_perfect:.6f}, Poisson={poisson_tabula_perfect:.6f}")


### 2.2 MAGIC (approximate, reversed normalization)


In [None]:
tabula_magic = magic_approx_reverse_norm(adata_tabula.copy())
mse_tabula_magic = mse(tabula_magic)
poisson_tabula_magic = poisson(tabula_magic)
mse_norm_tabula_magic = normalize_score(mse_tabula_magic, mse_tabula_none, mse_tabula_perfect)
poisson_norm_tabula_magic = normalize_score(poisson_tabula_magic, poisson_tabula_none, poisson_tabula_perfect)
mean_score_tabula_magic = (mse_norm_tabula_magic + poisson_norm_tabula_magic) / 2

exp = EXPECTED_MAGIC['tabula']
print("MAGIC (approx, rev norm) - Tabula Muris:")
print(f"  MSE:     {mse_tabula_magic:.4f} (expected: {exp['mse_raw']:.4f}) | norm: {mse_norm_tabula_magic:.2f} (expected: {exp['mse_norm']:.2f})")
print(f"  Poisson: {poisson_tabula_magic:.4f} (expected: {exp['poisson_raw']:.4f}) | norm: {poisson_norm_tabula_magic:.2f} (expected: {exp['poisson_norm']:.2f})")
print(f"  Mean score: {mean_score_tabula_magic:.2f} (expected: {exp['mean_score']:.2f})")


### 2.3 TTT Denoiser


In [None]:
tabula_ttt = ttt_denoise(adata_tabula.copy(), verbose=True)
mse_tabula_ttt = mse(tabula_ttt)
poisson_tabula_ttt = poisson(tabula_ttt)


In [None]:
mse_norm_tabula_ttt = normalize_score(mse_tabula_ttt, mse_tabula_none, mse_tabula_perfect)
poisson_norm_tabula_ttt = normalize_score(poisson_tabula_ttt, poisson_tabula_none, poisson_tabula_perfect)
mean_score_tabula_ttt = (mse_norm_tabula_ttt + poisson_norm_tabula_ttt) / 2

print("TTT (MSE mode) - Tabula Muris:")
print(f"  MSE:     {mse_tabula_ttt:.4f} | norm: {mse_norm_tabula_ttt:.2f}")
print(f"  Poisson: {poisson_tabula_ttt:.4f} | norm: {poisson_norm_tabula_ttt:.2f}")
print(f"  Mean score: {mean_score_tabula_ttt:.2f}")


### 2.4 Tabula Muris Results


In [None]:
tabula_results = pd.DataFrame({
    'Method': ['No denoising', 'Perfect denoising', 'MAGIC (approx, rev norm)', 'TTT (MSE)'],
    'MSE (raw)': [mse_tabula_none, mse_tabula_perfect, mse_tabula_magic, mse_tabula_ttt],
    'Poisson (raw)': [poisson_tabula_none, poisson_tabula_perfect, poisson_tabula_magic, poisson_tabula_ttt],
    'MSE (norm)': [0.0, 1.0, mse_norm_tabula_magic, mse_norm_tabula_ttt],
    'Poisson (norm)': [0.0, 1.0, poisson_norm_tabula_magic, poisson_norm_tabula_ttt],
    'Mean Score': [0.0, 1.0, mean_score_tabula_magic, mean_score_tabula_ttt]
})
tabula_results.round(5)


---
## 3. Final Comparison


In [None]:
final_results = pd.DataFrame({
    'Dataset': ['PBMC', 'PBMC', 'Tabula Muris', 'Tabula Muris'],
    'Method': ['MAGIC', 'TTT', 'MAGIC', 'TTT'],
    'MSE (norm)': [mse_norm_pbmc_magic, mse_norm_pbmc_ttt, mse_norm_tabula_magic, mse_norm_tabula_ttt],
    'Poisson (norm)': [poisson_norm_pbmc_magic, poisson_norm_pbmc_ttt, poisson_norm_tabula_magic, poisson_norm_tabula_ttt],
    'Mean Score': [mean_score_pbmc_magic, mean_score_pbmc_ttt, mean_score_tabula_magic, mean_score_tabula_ttt]
})

print("=" * 70)
print("FINAL COMPARISON (higher score = better, 0=no denoising, 1=perfect)")
print("=" * 70)
final_results.round(5)


In [None]:
print("\n" + "=" * 70)
print("OVERALL MEAN SCORE (averaged across datasets)")
print("=" * 70)
magic_mean = (mean_score_pbmc_magic + mean_score_tabula_magic) / 2
ttt_mean = (mean_score_pbmc_ttt + mean_score_tabula_ttt) / 2
print(f"  MAGIC (approx, rev norm): {magic_mean:.4f}")
print(f"  TTT (MSE mode):           {ttt_mean:.4f}")
