In [9]:
import pandas as pd
from scipy.stats import ttest_ind
import os

In [10]:
# Load your data
df = pd.read_csv("Good_and_Bad_Coils_Phasewise_correlation_summary.csv")

bad_coils = [5514681,5514677,5514672,5514670,5514668,5514674,5513807,5512575,5503973,5503968,5504079,5499471,5522770,5520684,5519680,5520680,5519376]
good_coils = [5484972,5484981,5485581,5487008,5488171,5486250,5487948,5487932,5487937,5487949,5487953,5487951,5490707,5492525,5493226,5493229,5493231,5493809,5499738,5499642,5499278,5500841,5504100,5504098,5504094,5510195]

In [11]:
# Add label column
df["label"] = df["Coil ID"].apply(
    lambda x: "bad" if x in bad_coils else ("good" if x in good_coils else "unknown")
)

# Keep only labeled coils
df = df[df["label"] != "unknown"]

results = []

# Group by Phase + Signal Feature + Shape Feature
for (phase, signal, shape), group in df.groupby(["Phase", "Signal Feature", "Shape Feature"]):
    good_vals = group[group["label"] == "good"]["Contribution Factor %"]
    bad_vals  = group[group["label"] == "bad"]["Contribution Factor %"]

    if len(good_vals) > 1 and len(bad_vals) > 1:  # need samples
        t_stat, p_val = ttest_ind(good_vals, bad_vals, equal_var=False)  # Welchâ€™s t-test

        results.append({
            "Phase": phase,
            "Signal Feature": signal,
            "Shape Feature": shape,
            "Good_Mean": good_vals.mean(),
            "Bad_Mean": bad_vals.mean(),
            "t_stat": t_stat,
            "p_value": p_val
        })

# Collect results
results_df = pd.DataFrame(results).sort_values("p_value")

In [12]:
# Save results
output_csv = "t_test_results.csv"
if os.path.exists(output_csv):
    os.remove(output_csv)
    
results_df.to_csv(output_csv, index=False)