[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openscilabs/isda/blob/main/dtlz.ipynb)

# MISDA Benchmark: DTLZ Suite

This notebook evaluates MISDA on standard Multi-Objective Optimization benchmarks:
*   **DTLZ2**: Spherical Pareto Front (Non-degenerate). Dimensionality should be preserved.
*   **DTLZ5**: Degenerate Pareto Front (Curve). Dimensionality should be reduced.
*   **DTLZ2 + Redundancy**: Evaluation of noise/redundancy removal capability.

In [None]:
# Install misda from repository
!pip install git+https://github.com/openscilabs/isda.git

import numpy as np
import pandas as pd
import misda
import math

print("Libraries loaded.")

In [None]:
# === MOP Generators (DTLZ) ===

def generate_dtlz2(N=1000, M=3, n_vars=12):
    """
    Generates N samples of DTLZ2 with M objectives.
    DTLZ2 Geometry: Spherical front.
    Intrinsic Dimension: M-1.
    """
    rng = np.random.default_rng()
    # k = n_vars - M + 1 (usually 10)
    k = n_vars - M + 1
    
    # Generate X
    X = rng.uniform(0.0, 1.0, size=(N, n_vars))
    
    xm = X[:, (M-1):] # Distance variables
    g = np.sum((xm - 0.5)**2, axis=1) # DTLZ2 g(x)
    
    F = np.zeros((N, M))
    
    for i in range(M):
        f = (1.0 + g)
        for j in range(M - 1 - i):
            f *= np.cos(X[:, j] * math.pi / 2.0)
        if i > 0:
            f *= np.sin(X[:, M - 1 - i] * math.pi / 2.0)
        F[:, i] = f
        
    return F, X

def generate_dtlz5(N=1000, M=3, n_vars=12):
    """
    Generates N samples of DTLZ5 (Degenerate curve).
    DTLZ5 Geometry: Curve on the sphere.
    Intrinsic Dimension: M-1 (IF I=M), but usually I < M.
    """
    rng = np.random.default_rng()
    # k = n_vars - M + 1
    k = n_vars - M + 1
    X = rng.uniform(0.0, 1.0, size=(N, n_vars))
    
    xm = X[:, (M-1):]
    g = np.sum((xm - 0.5)**2, axis=1)
    
    theta = np.zeros((N, M-1))
    theta[:, 0] = X[:, 0] * math.pi / 2.0
    
    gr = g[:, np.newaxis]
    for i in range(1, M-1):
        theta[:, i] = ((math.pi / (4.0 * (1.0 + gr))) * (1.0 + 2.0 * gr * X[:, i][:, np.newaxis])).ravel()
        
    F = np.zeros((N, M))
    for i in range(M):
        f = (1.0 + g)
        for j in range(M - 1 - i):
            f *= np.cos(theta[:, j])
        if i > 0:
            f *= np.sin(theta[:, M - 1 - i])
        F[:, i] = f
        
    return F, X

In [None]:
# === Validation Utilities ===

def evaluate_reduced_model_fidelity(results_dict):
    """
    Evaluates reconstruction fidelity and summarizes MISDA performance.
    Adapted from benchmark.ipynb.
    """
    results_summary = []
    for name, data in results_dict.items():
        # data is now a dictionary wrapping an MISDAResult object and truth
        result_obj = data.get("result_obj")
        truth = data.get("truth", {})
        
        Y = result_obj.Y
        mis = result_obj.best_mis
        mis_indices = mis["mis_indices"] if mis else []
        
        # Calculate fidelity (F_real)
        if not mis_indices:
            fidelity = 0.0
            ses = 0.0
        else:
            # Use stored SES results if available, else recalculate
            if result_obj.ses_results:
                fidelity = result_obj.ses_results["F_real"]
                ses = result_obj.ses_results.get("ses", 0.0)
            else:
                ses_out = misda.calculate_ses(Y, mis_indices, n_perm=1, return_details=True)
                fidelity = ses_out["F_real"]
                ses = ses_out.get("ses", 0.0)

        expected_dim = truth.get("intrinsic_dim_expected", None)
        mis_size = len(mis_indices)
        
        # Metrics from MISDA result
        regime_name = result_obj.regime.name if result_obj.regime else "N/A"
        alpha_min = result_obj.alpha_min
        alpha_max = result_obj.alpha_max
        compactness = getattr(result_obj, "min_compactness", 1.0)
        homog_ratio = getattr(result_obj, "homogeneity_ratio", 1.0)
        diagnosis = getattr(result_obj, "diagnosis", "N/A")

        # Status heuristic
        status = "Bad"
        # If F_real is high, it's good
        if fidelity >= 0.9:
            status = "OK"
        # If dimensionality matches exactly (even if Fidelity is tricky e.g. noise)
        elif expected_dim and mis_size == expected_dim:
            status = "OK"
        # If strictly noise
        elif str(regime_name) == "SIGNAL_BELOW_NOISE":
            status = "Noise"
            
        # Quality check override
        if homog_ratio < 0.6:
            status = "WARN"

        entry = {
            "Case": name,
            "a_min": f"{alpha_min:.2e}",
            "a_max": f"{alpha_max:.2e}",
            "Regime": regime_name,
            "Exp": expected_dim,
            "Fnd": mis_size,
            "Fidel": f"{fidelity:.4f}",
            "SES": f"{ses:.4f}",
            "Comp": f"{compactness:.4f}",
            "Homog": f"{homog_ratio:.4f}",
            "Diag": diagnosis,
            "Stat": status
        }
        results_summary.append(entry)

    df_summary = pd.DataFrame(results_summary)
    
    # Reorder columns
    cols = [
        "Case", "Regime", "a_min", "a_max", "Exp", 
        "Fnd", "Fidel", "SES", "Comp", "Homog", "Diag", "Stat"
    ]
    # Filter only columns that exist
    df_summary = df_summary[[c for c in cols if c in df_summary.columns]]

    return df_summary

results = {}

## 1. DTLZ2 (Irreducible)
M=3 objectives. Intrinsic dimensionality is 2 (manifold), but 3 conflicting objectives are required to describe the front.
Expectation: MISDA should **retain all 3 objectives** (No reduction).

In [None]:
Y, _ = generate_dtlz2(N=500, M=3)
df = pd.DataFrame(Y, columns=['f1', 'f2', 'f3'])

name = "DTLZ2 (M=3)"
res = misda.analyze(df, caution=0.5, run_ses=True, name=name)
print(res.summary())
res.plot()

results[name] = {
    "result_obj": res,
    "truth": {"intrinsic_dim_expected": 3}
}

## 2. DTLZ5 (Degenerate)
M=3 objectives, but defined on a curve (1D manifold). 
Expectation: MISDA should potentially detect strong correlations or reducibility, although DTLZ5 is tricky due to nonlinear relationships.

In [None]:
Y, _ = generate_dtlz5(N=500, M=3)
df = pd.DataFrame(Y, columns=['f1', 'f2', 'f3'])

name = "DTLZ5 (Degenerate)"
res = misda.analyze(df, caution=0.5, run_ses=True, name=name)
print(res.summary())
res.plot()

results[name] = {
    "result_obj": res,
    "truth": {"intrinsic_dim_expected": 2}
}

## 3. DTLZ2 with Linear Redundancy
We take DTLZ2 (3 objs) and add 3 noisy copies of each. Total 12 objectives.
Expectation: MISDA should reduce it back to **3 objectives**.

In [None]:
Y_base, _ = generate_dtlz2(N=500, M=3)
rng = np.random.default_rng(42)

all_feats = []
names = []
for i in range(3):
    orig = Y_base[:, i]
    all_feats.append(orig)
    names.append(f"f{i+1}")
    # Add 3 copies
    for k in range(3):
        copy = orig + 0.05 * rng.normal(size=len(orig))
        all_feats.append(copy)
        names.append(f"f{i+1}_copy{k+1}")

Y_redundant = np.column_stack(all_feats)
df_red = pd.DataFrame(Y_redundant, columns=names)

name = "DTLZ2 + Redundancy"
res = misda.analyze(df_red, caution=0.5, run_ses=True, name=name)
print(res.summary())
print("Selected:", res.best_mis['mis_labels'])
res.plot()

results[name] = {
    "result_obj": res,
    "truth": {"intrinsic_dim_expected": 3}
}

## 4. Final Summary

In [None]:
print("\n--- MISDA Performance Summary for DTLZ ---")
df_summary = evaluate_reduced_model_fidelity(results)
# Output as plain text table
print(df_summary.to_string(index=False))
