In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import mplcursors


import json
import re

def norm_min_max(df: pd.DataFrame, col: str):
    values = df[col]
    return (values - values.min()) / (values.max() - values.min())


def load_task_and_preprocess(results_file, task_name):

    with open(results_file, "r") as f:
        lines = f.readlines()

    list_json = []
    for l in lines:
        list_json.append(json.loads(l))
    df = pd.DataFrame(list_json)

    acc_keys = {"livecodebench": "acc", "code2text_python": "smoothed_bleu_4,create_output"}
    df = df[df["task_name"] == task_name]
    df["model"] = df["model"].apply(lambda x: x.split("/")[1])
    df["params"] = df["model"].apply(lambda x: re.findall(r"(\d+(?:\.\d+)?[bBmM])", x.upper())[0])
    df["acc_values"] = df["acc_values"].apply(lambda x: x[acc_keys[task_name]])
    df = df.reset_index()
    df = df[["model", "params", "task_name", "acc_values", "energy_consumed"]]
    return df

df_lcb = load_task_and_preprocess("../../lm_eval/results/final_results.jsonl", "livecodebench")
df_c2t = load_task_and_preprocess("../../lm_eval/results/final_results.jsonl", "code2text_python")

In [None]:
df_lcb = df_lcb.drop(columns=["task_name"])
df_c2t = df_c2t.drop(columns=["task_name"])

In [None]:
def normalize(df):
    df["energy_norm"] = norm_min_max(df, "energy_consumed")
    df["ene_eff"] = 1 - df["energy_norm"]
    df["perf"] = norm_min_max(df, "acc_values")

In [None]:
normalize(df_lcb)
normalize(df_c2t)

In [None]:
import numpy as np
from matplotlib.colors import ListedColormap, BoundaryNorm, LinearSegmentedColormap


center = (1, 1)

def calculate_euc_formula(df):
    return ((1 - df["perf"]) ** 2 + (1 - df["ene_eff"]) ** 2) ** 0.5
    
def fill_distance_ranking(df, distance):
    df = df.copy()
    df["CIRC_rank"] = 0
    # Circle parameters
    radiuses = np.linspace(0, np.sqrt(2), 6)
    selected_so_far = set()
    curr_rank = 5
    for r in radiuses:
        if r == 0:
            continue
        less_than_r = df[distance < r].index.to_list()
        new_points = set(less_than_r).difference(selected_so_far)
        selected_so_far.update(less_than_r)
        if new_points:
            df.loc[list(new_points), "CIRC_rank"] = curr_rank
        else:
            pass
        curr_rank -= 1
    return df["CIRC_rank"]

In [None]:
distance_lcb = calculate_euc_formula(df_lcb)
distance_c2t = calculate_euc_formula(df_c2t)

circ_lcb_ranks = fill_distance_ranking(df_lcb, distance_lcb)
circ_c2t_ranks = fill_distance_ranking(df_c2t, distance_c2t)

In [None]:
df_lcb["CIRC_rank"] = circ_lcb_ranks
df_c2t["CIRC_rank"] = circ_c2t_ranks

In [None]:
from sklearn.covariance import MinCovDet
from scipy.stats import chi2

def remove_outliers(df, percentile=0.95):

    X = df[["ene_eff", "perf"]].to_numpy()
    mcd = MinCovDet().fit(X)
    D2 = mcd.mahalanobis(X)
    cut = chi2.ppf(percentile, df=2)

    outliers = D2 > cut
    inliers = ~outliers
    X_clean = X[inliers]
    return X_clean

def create_all_possible_derivatives(ene_eff, acc):
    derivatives = []
    for i in range(len(ene_eff)):
        i_x = ene_eff.iloc[i]
        i_y = acc.iloc[i]
        for j in range(i+1, len(ene_eff)):
            new_x = ene_eff.iloc[j]
            new_y = acc.iloc[j]
            if (new_x < i_x and new_y > i_y) or (new_x > i_x and new_y < i_y):
                derivatives.append(-abs((new_y - i_y) / (new_x - i_x)))
    return derivatives

import numpy as np
from sklearn.covariance import MinCovDet
from scipy.stats import chi2

def remove_derivative_outliers(all_possible_derivates):

    deriv = np.array(all_possible_derivates)
    mcd = MinCovDet().fit(deriv.reshape(-1, 1))
    # squared Mahalanobis distances under robust location/covariance:
    d2 = mcd.mahalanobis(deriv.reshape(-1, 1))

    thr = chi2.ppf(0.95, df=1)

    deriv_inliers_all = deriv[d2 <= thr]
    deriv_outliers_all = deriv[d2 > thr]
    return deriv_inliers_all


import numpy as np
import cvxpy as cp
import matplotlib.pyplot as plt

def approximate_regression_function(df, X_clean, deriv_inliers_all, quantile=75, degree=5, intercept=1e-2):

    x_raw, y = X_clean[:, 0], X_clean[:, 1]


    d = degree
    b = cp.Variable(d + 1)

    # Least-squares objective
    x_transformed = np.vander(x_raw, N=d + 1, increasing=True)
    objective = cp.Minimize(cp.sum_squares(x_transformed @ b - y))

    # Enforce f′(z) ≤ 0 on a grid
    z = np.linspace(0, 1, 50)
    D = np.zeros((len(z), d + 1))
    for j, zj in enumerate(z):
        for k in range(1, d + 1):
            D[j, k] = k * zj ** (k - 1)
        
    prob = cp.Problem(objective, [D @ b <= np.percentile(deriv_inliers_all, quantile), cp.sum(b) >= intercept])
    prob.solve()
    # X_plot = np.linspace(-0.1, 1.1, 100).reshape(-1, 1)
    # X_grid = np.vander(X_plot.flatten(), N=d + 1, increasing=True)
    # y_grid = X_grid @ b.value

    # plt.figure(figsize=(5, 3))
    # plt.scatter(X_clean[:, 0], X_clean[:, 1], s=30)
    # plt.scatter(df["ene_eff"], df["perf"], c="red", marker="x", s=8)
    # plt.plot(X_plot, y_grid, label="monotone ↓")
    # plt.xlabel("x")
    # plt.ylabel("y")
    # plt.title("Smooth Monotonic-Decreasing Regression")
    # plt.legend()
    # plt.show()
    return b, d

def regression_rank(df, b, d):
    predicted_perf = np.vander(df["ene_eff"], N=d + 1, increasing=True) @ b.value
    score = df["perf"] / predicted_perf
    min_score, max_score = score.min(), score.max()
    five_intervals = (max_score - min_score) / 5
    oter_rank = np.ceil((score - min_score) / five_intervals)
    oter_rank[oter_rank == 0] = 1
    oter_rank = oter_rank.astype(int)
    # tmp_df = pd.DataFrame()
    # tmp_df["predicted"] = predicted_perf
    # tmp_df["perf"] = df["perf"].values
    # tmp_df["score"] = score.values
    # tmp_df["OTER_rank"] = oter_rank.values
    # print(tmp_df)

    return oter_rank



In [None]:
X_clean_lcb = remove_outliers(df_lcb)
X_clean_c2t = remove_outliers(df_c2t)
all_possible_derivates_lcb = create_all_possible_derivatives(df_lcb["ene_eff"], df_lcb["perf"])
all_possible_derivates_c2t = create_all_possible_derivatives(df_c2t["ene_eff"], df_c2t["perf"])
deriv_inliers_all_lcb = remove_derivative_outliers(all_possible_derivates_lcb)
deriv_inliers_all_c2t = remove_derivative_outliers(all_possible_derivates_c2t)
coefficients_lcb, degree_lcb = approximate_regression_function(df_lcb, X_clean_lcb, deriv_inliers_all_lcb)
coefficients_c2t, degree_c2t = approximate_regression_function(df_c2t, X_clean_c2t, deriv_inliers_all_c2t)
oter_rank_lcb = regression_rank(df_lcb, coefficients_lcb, degree_lcb)
oter_rank_c2t = regression_rank(df_c2t, coefficients_c2t, degree_c2t)

In [None]:
df_lcb["OTER_rank"] = oter_rank_lcb
df_c2t["OTER_rank"] = oter_rank_c2t

## Parameter not showing correlation

In [None]:
def categorize(x):
    if x < 3:
        return 0
    if 3 <= x < 7:
        return 1
    else:
        return 2

from scipy.stats import kruskal
for i, df in {"lcb": df_lcb, "cxg": df_c2t}.items():
    for method in ["CIRC", "OTER"]:
        new_df = df.copy()
        new_df["params"] = new_df["params"].apply(lambda x: float(x.replace("B", "")) if "B" in x else float(x.replace("M", "")) / 100).apply(categorize)

        # assume df has columns: 'model_size' (0,1,2) and 'score'
        groups = [new_df[new_df['params'] == g][f'{method}_rank'] for g in new_df['params'].unique()]
        stat, p = kruskal(*groups)
        print(f"{i}-{method} Kruskal-Wallis H-statistic: {stat:.4f}, p-value: {p:.4f}")


In [None]:
new_df["params"].value_counts()

# ---------------------------

# Leave One Out

In [None]:
import numpy as np
from scipy.stats import kendalltau

for task, df in {"lcb": df_lcb, "cxg": df_c2t}.items():
    base = "CIRC_rank"
    for i in range(len(df)):
        new_df = df.drop(df.index[i]).copy()     # safer than concat slices
        normalize(new_df)                         # must operate on new_df only
        dist = calculate_euc_formula(new_df)
        circ_ranks = fill_distance_ranking(new_df, dist)  # index = new_df.index

        col = f"_leftcirc_{i}"
        df[col] = np.nan
        df.loc[circ_ranks.index, col] = circ_ranks

    loo_cols = [c for c in df.columns if c.startswith("_leftcirc_")]
    # mean abs rank drift per fold (ignore the held-out NaN)
    drifts = {c: (df[c] - df[base]).abs().mean(skipna=True) for c in loo_cols}

    # optional: Kendall-tau vs base per fold
    taus = {}
    base_order = df[base].dropna()
    for c in loo_cols:
        both = df[[base, c]].dropna()
        taus[c] = kendalltau(both[base], both[c]).correlation
    print("-" * 30)
    print(task)

    print("Mean LOO mean-rank-drift:", np.mean(list(drifts.values())))
    print("Worst LOO mean-rank-drift:", max(drifts.items(), key=lambda x: x[1]))
    print("Mean Kendall-τ across LOO:", np.nanmean(list(taus.values())))


In [None]:
import numpy as np
from scipy.stats import kendalltau

for task, df in {"lcb": df_lcb, "cxg": df_c2t}.items():
    base = "OTER_rank"
    for i in range(len(df)):
        new_df = df.drop(df.index[i]).copy()     # safer than concat slices
        normalize(new_df)                         # must operate on new_df only
        X_clean = remove_outliers(new_df)
        all_possible_derivates = create_all_possible_derivatives(new_df["ene_eff"], new_df["perf"])
        deriv_inliers_all = remove_derivative_outliers(all_possible_derivates)
        coefficients, degree = approximate_regression_function(new_df, X_clean, deriv_inliers_all)
        oter_rank = regression_rank(new_df, coefficients, degree)

        col = f"left_{i}"
        df[col] = np.nan
        df.loc[oter_rank.index, col] = oter_rank

    loo_cols = [c for c in df.columns if c.startswith("left_")]
    # mean abs rank drift per fold (ignore the held-out NaN)
    drifts = {c: (df[c] - df[base]).abs().mean(skipna=True) for c in loo_cols}

    # optional: Kendall-tau vs base per fold
    taus = {}
    base_order = df[base].dropna()
    for c in loo_cols:
        both = df[[base, c]].dropna()
        taus[c] = kendalltau(both[base], both[c]).correlation

    print("-"*30)
    print(task)
    print("Mean LOO mean-rank-drift:", np.mean(list(drifts.values())))
    print("Worst LOO mean-rank-drift:", max(drifts.items(), key=lambda x: x[1]))
    print("Mean Kendall-τ across LOO:", np.nanmean(list(taus.values())))


## Further Tests

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import kendalltau

# === Edit these once ===
DF = df_c2t          # or df_c2t (run the suite twice if you want)
X_COL = "perf"            # your objective-1 column in [0,1]
Y_COL = "ene_eff"            # your objective-2 column in [0,1]

# Optional: if your rating isn’t in DF yet, provide a callable:
def rate_fn_OTER(new_df):
    X_clean = remove_outliers(new_df)
    all_possible_derivates = create_all_possible_derivatives(new_df["ene_eff"], new_df["perf"])
    deriv_inliers_all = remove_derivative_outliers(all_possible_derivates)
    coefficients, degree = approximate_regression_function(new_df, X_clean, deriv_inliers_all)
    oter_rank = regression_rank(new_df, coefficients, degree)
    return oter_rank

def rate_fn_CIRC(new_df):
    dist = calculate_euc_formula(new_df)
    circ_ranks = fill_distance_ranking(new_df, dist)
    return circ_ranks

def get_base_rating(df, method="circ"):
    return pd.Series(rate_fn_OTER(df) if method == "oter" else rate_fn_CIRC(df), index=df.index).astype(int)

R_BASE = get_base_rating(DF)
RANK_BASE = R_BASE.rank(method="average", ascending=True)

In [None]:
def noise_robustness(df, eps=0.03, trials=20, seed=0):
    rng = np.random.default_rng(seed)
    r0 = get_base_rating(df)
    changes = []
    worst = 0
    for _ in range(trials):
        dx = rng.uniform(-eps, eps, len(df))
        dy = rng.uniform(-eps, eps, len(df))
        dfp = df.copy()
        dfp[X_COL] = np.clip(df[X_COL].values + dx, 0, 1)
        dfp[Y_COL] = np.clip(df[Y_COL].values + dy, 0, 1)
        r1 = get_base_rating(dfp)
        diff = (r1 - r0).abs()
        changes.append(diff.mean())
        worst = max(worst, diff.max())
    return float(np.mean(changes)), int(worst)

mean_abs, worst = noise_robustness(DF, eps=0.03, trials=20)
print(f"Mean |Δrating| @ ε=0.05: {mean_abs:.3f}  |  Worst-case Δrating: {worst}")


## OTER different hyperaparameter

In [None]:
degrees = [3,4,5,6,7]
MCD_percentile = [0.90, 0.95, 0.975]
LES_quantile = [65, 70, 75, 80]
intercept_at_one = [1e-1, 1e-2, 1e-3]
# degrees = [1, 35]
# MCD_percentile = [0.95]
# LES_quantile = [75] # -inf ..... 0
# intercept_at_one = [1e-2]
perm_cols = []
for df in [df_lcb, df_c2t]:
    for deg in degrees:
        for mcd_p in MCD_percentile:
            for lq in LES_quantile:
                for icpt in intercept_at_one:
                    X_clean = remove_outliers(df, mcd_p)
                    all_possible_derivates = create_all_possible_derivatives(df["ene_eff"], df["perf"])
                    deriv_inliers_all = remove_derivative_outliers(all_possible_derivates)
                    coefficients, degree = approximate_regression_function(df, X_clean, deriv_inliers_all, lq, deg, icpt)
                    oter_rank = regression_rank(df, coefficients, deg)
                    col_name = f"deg_{deg}_mcd_{mcd_p}_ls_{lq}_icpt_{icpt}" 
                    perm_cols.append(col_name)
                    df[col_name] = oter_rank
                    


In [None]:
df_lcb

In [None]:
df_c2t

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
 
class OTERSensitivityAnalysis:
    def __init__(self, df_lcb, df_c2t, baseline_config=(5, 0.95, 75, 0.01)):
        """
        Parameters:
        -----------
        df_lcb, df_c2t : pd.DataFrame
            Dataframes with OTER rank columns
        baseline_config : tuple
            (degree, mcd_percentile, les_quantile, intercept)
        """
        self.df_lcb = df_lcb
        self.df_c2t = df_c2t
        # Construct baseline column name
        deg, mcd, lq, icpt = baseline_config
        # self.baseline_col = f"deg_{deg}_mcd_{mcd}_ls_{lq}_icpt_{icpt}"
        self.baseline_col = f"OTER_rank"
        # Get all OTER rank columns
        self.rank_cols_lcb = [col for col in df_lcb.columns if col.startswith('deg_')]
        self.rank_cols_c2t = [col for col in df_c2t.columns if col.startswith('deg_')]
    def _parse_config(self, col_name):
        """Extract config from column name: deg_5_mcd_0.95_ls_75_icpt_0.01"""
        parts = col_name.split('_')
        return {
            'degree': int(parts[1]),
            'mcd': float(parts[3]),
            'les': int(parts[5]),
            'intercept': float(parts[7])
        }
    def compute_correlations(self, df, rank_cols, task_name):
        """Compute pairwise correlations for one task"""
        baseline_ratings = df[self.baseline_col].values
        results = []
        for col in rank_cols:
            if col == self.baseline_col:
                continue
            config = self._parse_config(col)
            test_ratings = df[col].values
            # Spearman correlation
            rho, p_val = spearmanr(baseline_ratings, test_ratings)
            # Rating changes
            changes = np.abs(baseline_ratings - test_ratings)
            results.append({
                'task': task_name,
                'config_name': col,
                'degree': config['degree'],
                'mcd': config['mcd'],
                'les': config['les'],
                'intercept': config['intercept'],
                'spearman_rho': rho,
                'p_value': p_val,
                'mean_abs_change': changes.mean(),
                'pct_no_change': (changes < 1).mean() * 100,  # Changed from <= 1 to < 1
                'pct_change_0': (changes == 0).mean() * 100,  # Explicitly == 0
            })
        return pd.DataFrame(results)
    def analyze_all(self):
        """Analyze both tasks"""
        # Compute for both tasks
        results_lcb = self.compute_correlations(self.df_lcb, self.rank_cols_lcb, 'LCB')
        results_c2t = self.compute_correlations(self.df_c2t, self.rank_cols_c2t, 'CXG')
        # Combine
        all_results = pd.concat([results_lcb, results_c2t], ignore_index=True)
        return all_results
    def analyze_by_parameter(self, results_df, baseline_config=(5, 0.95, 75, 0.01)):
        """Analyze sensitivity for each parameter independently"""
        deg_base, mcd_base, les_base, icpt_base = baseline_config
        analysis = {}
        # Degree analysis
        degree_mask = (
            (results_df['mcd'] == mcd_base) & 
            (results_df['les'] == les_base) & 
            (results_df['intercept'] == icpt_base)
        )
        analysis['degree'] = results_df[degree_mask].groupby('degree').agg({
            'spearman_rho': ['mean', 'min', 'max'],
            'p_value': ['mean', 'max'],
            'pct_no_change': ['mean', 'min']  # Changed from pct_change_leq_1
        }).round(4)
        # MCD analysis
        mcd_mask = (
            (results_df['degree'] == deg_base) & 
            (results_df['les'] == les_base) & 
            (results_df['intercept'] == icpt_base)
        )
        analysis['mcd'] = results_df[mcd_mask].groupby('mcd').agg({
            'spearman_rho': ['mean', 'min', 'max'],
            'p_value': ['mean', 'max'],
            'pct_no_change': ['mean', 'min']  # Changed
        }).round(4)
        # LES analysis
        les_mask = (
            (results_df['degree'] == deg_base) & 
            (results_df['mcd'] == mcd_base) & 
            (results_df['intercept'] == icpt_base)
        )
        analysis['les'] = results_df[les_mask].groupby('les').agg({
            'spearman_rho': ['mean', 'min', 'max'],
            'p_value': ['mean', 'max'],
            'pct_no_change': ['mean', 'min']  # Changed
        }).round(4)
        # Intercept analysis
        icpt_mask = (
            (results_df['degree'] == deg_base) & 
            (results_df['mcd'] == mcd_base) & 
            (results_df['les'] == les_base)
        )
        analysis['intercept'] = results_df[icpt_mask].groupby('intercept').agg({
            'spearman_rho': ['mean', 'min', 'max'],
            'p_value': ['mean', 'max'],
            'pct_no_change': ['mean', 'min']  # Changed
        }).round(4)
        return analysis
    def compute_summary_statistics(self, results_df):
        """Overall statistics across all configurations"""
        stats = {
            'total_configs': len(results_df),
            'mean_rho': results_df['spearman_rho'].mean(),
            'min_rho': results_df['spearman_rho'].min(),
            'max_rho': results_df['spearman_rho'].max(),
            'std_rho': results_df['spearman_rho'].std(),
            'configs_rho_above_085': (results_df['spearman_rho'] > 0.85).sum(),
            'configs_rho_above_080': (results_df['spearman_rho'] > 0.80).sum(),
            'mean_stability': results_df['pct_no_change'].mean(),  # Changed
            'min_stability': results_df['pct_no_change'].min(),    # Changed
            'configs_stability_above_80': (results_df['pct_no_change'] > 80).sum(),  # Changed
            # Statistical significance metrics
            'configs_significant_p005': (results_df['p_value'] < 0.05).sum(),
            'configs_significant_p001': (results_df['p_value'] < 0.01).sum(),
            'configs_significant_p0001': (results_df['p_value'] < 0.001).sum(),
            'mean_p_value': results_df['p_value'].mean(),
            'max_p_value': results_df['p_value'].max(),
        }
        # Per-task breakdown
        for task in ['LCB', 'CXG']:
            task_df = results_df[results_df['task'] == task]
            stats[f'{task}_mean_rho'] = task_df['spearman_rho'].mean()
            stats[f'{task}_min_rho'] = task_df['spearman_rho'].min()
            stats[f'{task}_mean_stability'] = task_df['pct_no_change'].mean()  # Changed
            stats[f'{task}_significant_p001'] = (task_df['p_value'] < 0.01).sum()
        return stats
    
    def find_most_unstable_config_pairs(self, df, rank_cols, top_n=5):
        """
        Find config pairs with highest % ≤1 class change difference
        Returns: DataFrame with pairs and models that changed between them
        """
        from itertools import combinations
        baseline_ratings = df[self.baseline_col].values
        results = []
        for col1, col2 in combinations(rank_cols, 2):
            ratings1 = df[col1].values
            ratings2 = df[col2].values
            # Stability vs baseline
            stab1 = (np.abs(ratings1 - baseline_ratings) <= 1).mean() * 100
            stab2 = (np.abs(ratings2 - baseline_ratings) <= 1).mean() * 100
            # Changes between configs
            changes = np.abs(ratings1 - ratings2)
            changed_models = df.loc[changes > 0, 'model'].tolist()
            results.append({
                'config1': col1,
                'config2': col2,
                'stability_diff': abs(stab1 - stab2),
                'models_changed': len(changed_models),
                'changed_model_ids': ', '.join(changed_models),
                'max_rating_change': changes.max()
            })
        return pd.DataFrame(results).sort_values('stability_diff', ascending=False).head(top_n)
    
    def print_summary(self, results_df, param_analysis, stats):
        """Print formatted summary"""
        print("="*70)
        print("OTER PARAMETER SENSITIVITY ANALYSIS - SUMMARY")
        print("="*70)
        print(f"\n{'OVERALL STATISTICS':-^70}")
        print(f"Total configurations tested: {stats['total_configs']}")
        print(f"Baseline: {self.baseline_col}")
        print(f"\n{'Spearman Correlation:':-^70}")
        print(f"  Mean ρ:  {stats['mean_rho']:.3f}")
        print(f"  Min ρ:   {stats['min_rho']:.3f}")
        print(f"  Max ρ:   {stats['max_rho']:.3f}")
        print(f"  Std ρ:   {stats['std_rho']:.3f}")
        print(f"  Configs with ρ > 0.85: {stats['configs_rho_above_085']}/{stats['total_configs']} ({stats['configs_rho_above_085']/stats['total_configs']*100:.1f}%)")
        print(f"  Configs with ρ > 0.80: {stats['configs_rho_above_080']}/{stats['total_configs']} ({stats['configs_rho_above_080']/stats['total_configs']*100:.1f}%)")
        print(f"\n{'Statistical Significance:':-^70}")
        print(f"  Configs with p < 0.05:   {stats['configs_significant_p005']}/{stats['total_configs']} ({stats['configs_significant_p005']/stats['total_configs']*100:.1f}%)")
        print(f"  Configs with p < 0.01:   {stats['configs_significant_p001']}/{stats['total_configs']} ({stats['configs_significant_p001']/stats['total_configs']*100:.1f}%)")
        print(f"  Configs with p < 0.001:  {stats['configs_significant_p0001']}/{stats['total_configs']} ({stats['configs_significant_p0001']/stats['total_configs']*100:.1f}%)")
        print(f"  Mean p-value: {stats['mean_p_value']:.6f}")
        print(f"  Max p-value:  {stats['max_p_value']:.6f}")
        print(f"\n{'Rating Stability:':-^70}")
        print(f"  Mean % no change: {stats['mean_stability']:.1f}%")  # Changed label
        print(f"  Min % no change:  {stats['min_stability']:.1f}%")   # Changed label
        print(f"  Configs with >80% stability: {stats['configs_stability_above_80']}/{stats['total_configs']} ({stats['configs_stability_above_80']/stats['total_configs']*100:.1f}%)")
        print(f"\n{'Per-Task Breakdown:':-^70}")
        print(f"  LCB - Mean ρ: {stats['LCB_mean_rho']:.3f}, Min ρ: {stats['LCB_min_rho']:.3f}, Stability: {stats['LCB_mean_stability']:.1f}%, p<0.01: {stats['LCB_significant_p001']}")
        print(f"  CXG - Mean ρ: {stats['CXG_mean_rho']:.3f}, Min ρ: {stats['CXG_min_rho']:.3f}, Stability: {stats['CXG_mean_stability']:.1f}%, p<0.01: {stats['CXG_significant_p001']}")
        print(f"\n{'PARAMETER-SPECIFIC SENSITIVITY':-^70}")
        for param_name, param_df in param_analysis.items():
            print(f"\n{param_name.upper()}:")
            print(param_df)
 
# ============================================================================
# USAGE
# ============================================================================
 
# Initialize
analyzer = OTERSensitivityAnalysis(
    df_lcb, 
    df_c2t, 
    baseline_config=(5, 0.95, 75, 0.01)  # deg, mcd, les, intercept
)
 
# Run analysis
print("Computing correlations...")
all_results = analyzer.analyze_all()
 
print("Analyzing by parameter...")
param_analysis = analyzer.analyze_by_parameter(all_results)
 
print("Computing summary statistics...")
stats = analyzer.compute_summary_statistics(all_results)
 
# Print summary
analyzer.print_summary(all_results, param_analysis, stats)
 
# Export results
all_results.to_csv('oter_sensitivity_results.csv', index=False)
print(f"\n✓ Detailed results saved to 'oter_sensitivity_results.csv'")
 
# Summary statistics to DataFrame for easy viewing
summary_df = pd.DataFrame([stats]).T
summary_df.columns = ['Value']
summary_df.to_csv('oter_sensitivity_summary.csv')
print(f"✓ Summary statistics saved to 'oter_sensitivity_summary.csv'")
 
# Parameter analysis to Excel (multiple sheets)
with pd.ExcelWriter('oter_parameter_analysis.xlsx') as writer:
    for param_name, param_df in param_analysis.items():
        param_df.to_excel(writer, sheet_name=param_name)
print(f"✓ Parameter analysis saved to 'oter_parameter_analysis.xlsx'")