## Bucket wise

In [1]:
import numpy as np
import pandas as pd
from dtw import dtw
from scipy.spatial.distance import euclidean

# Load your dataframe
df = pd.read_csv(
    "bucket_coil_signal_averages_allsignals_allphase.csv"
)
target_buckets = [6, 5, 1, 8, 12, 20, 10, 2]
df = df[df["Bucket_ID"].isin(target_buckets)]

# Ensure required columns
assert (
    "Bucket_ID" in df.columns and "cluster_label" in df.columns
), "Missing required columns!"

# Identify signal columns
exclude_cols = [
    "coil_id",
    "Phase",
    "Width_Bin",
    "Gauge_Bin",
    "Reduction_Bin",
    "Bucket_ID",
    "Bucket_Name",
    "rollingmode",
    "cluster_label",
]
signal_cols = [c for c in df.columns if c not in exclude_cols]

bucket_top_signals = {}

for bucket, bucket_df in df.groupby("Bucket_ID"):
    print(f"Processing bucket: {bucket}")

    feature_dtw = {}

    # Average signal pattern across clusters
    cluster_means = bucket_df.groupby("cluster_label")[signal_cols].mean()

    for signal in signal_cols:
        signal_pattern = cluster_means[signal].values
        overall_pattern = cluster_means.mean(axis=1).values

        # Compute DTW distance using dtw-python
        alignment = dtw(
            signal_pattern, overall_pattern, keep_internals=True, dist_method=euclidean
        )
        feature_dtw[signal] = alignment.distance

    # Top 30 lowest DTW distance signals
    top_30 = sorted(feature_dtw, key=feature_dtw.get)[:30]
    bucket_top_signals[bucket] = top_30

# Find 10 common signals across all buckets
common_signals = list(set.intersection(*map(set, bucket_top_signals.values())))

# Create summary DataFrame
top_df = pd.DataFrame(dict([(f"Bucket_{b}", s) for b, s in bucket_top_signals.items()]))

print("\nTop 30 signals per bucket saved in top_df")
print("\nCommon 10 signals across all buckets:")
print(common_signals)

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



AssertionError: Missing required columns!

## Using individual csv files

In [1]:
import glob
import os
from itertools import combinations

import numpy as np
import pandas as pd

# ===============================
# Configuration
# ===============================
folder_path = (
    r"D:\Stelco\Work\Dynamic Correlation\Key\Whole2"  # change this to your folder path
)
output_path = "dtw_signal_ranking.csv"
signal_names = [
    "Stand 1 Predicted Run Force",
    "Stand 1 Gap Stick Offset",
    "Tension Reel Calculated Tension",
    "Tension To Gap 2 In Limit",
    "Stand 1-2 Total Tension Feedback",
    "Ramp Greater Than Thread",
    "Stand 3 - Operator Side Force",
    "Flatness Control - Bending In Limit",
    "Stand 1 Run Gap Setpoint",
    "Stand 1 Gap Bite Offset",
    "S1 Operating Bending Trim",
    "Stand 2-3 Tension Reference",
    "Neet Oil Concentration",
    "Morgoil DriveTop Bearing Outflow Temp Stand1",
    "Stand 4 Top Current Feedback",
    "Morgoil DriveTop Bearing Outflow Temp Stand3",
    "Stand 3 Run Gap Setpoint",
    "Stand 2 Total Bending Feedback",
    "Stand 2 Gap Bite Offset",
    "Morgoil OperBottom Bearing Outflow Temp Stand3",
    "Stand 4 Thread Gap Setpoint",
    "X4 Gauge Deviation",
    "Stand 4 DS Total Bending Feedback",
    "Laser 0 Data Valid",
    "Stand 4 - Operator Side Force",
    "Stand 2 Gap Eccentricity Trim",
    "Stand 4 Gap Operator Offset",
    "Stand 3 Total Bending Feedback",
    "Strip In Stand 3",
    "Morgoil OperTop Bearing Outflow Temp Stand1",
    "Strip In Stand 1",
    "X1 Gauge Deviation",
    "Stand 3 Drive Speed Feedback",
    "Stand 2 Gap Thread Offset",
    "Stand 2 Drive Speed Feedback",
    "Stand 1-3 Solution System Pressure",
    "Stand 2 Top Current Feedback",
    "Stand 1-3 Solution Temperature",
    "AGC GE Feedforward Hardness Number",
    "Stand 1 Total Bending Feedback",
    "X0 Gauge Deviation",
    "Stand 3 Bottom Current Feedback",
    "Stand 4 Gap Eccentricity Trim",
    "Stand 2 Gap Stick Offset",
    "Stand 3-4 Tension Reference",
    "Stand 4 Bottom Current Feedback",
    "Stand 1 Bottom Current Feedback",
    "Stand 3 Gap Thread Offset",
    "Stand 2 Bottom Current Feedback",
    "Stand 4 Solution System Pressure",
    "Stand 3 Gap Eccentricity Trim",
    "Stand 4 OS Total Bending Feedback",
    "Stand 1 Gap Thread Offset",
    "AGC Alex Dynamic Feedforward Hardness Number",
    "Stand 3 Top Current Feedback",
    "S2 Operating Bending Trim",
    "Roll Force Hydraulic Tank Level Inches",
    "Roll Force Hydraulics Pressure Feedback",
    "Stand 1 Roll Force Increase Limit (based on predicted run force)",
    "Stand 4 OS Bending Shape Trim",
    "Stand 4 DS Bending Shape Trim",
]

# ===============================
# Helper functions
# ===============================
def safe_read_csv(path):
    try:
        return pd.read_csv(path)
    except:
        return pd.read_csv(path, encoding="latin1")


def dtw_distance(a, b):
    na, nb = len(a), len(b)
    if na == 0 or nb == 0:
        return float("inf")
    D = np.full((na + 1, nb + 1), np.inf)
    D[0, 0] = 0.0
    for i in range(1, na + 1):
        for j in range(1, nb + 1):
            cost = (a[i - 1] - b[j - 1]) ** 2
            D[i, j] = cost + min(D[i - 1, j], D[i, j - 1], D[i - 1, j - 1])
    return float(np.sqrt(D[na, nb]))


def znormalize(x):
    x = np.asarray(x, dtype=float)
    if len(x) == 0:
        return x
    m, s = np.nanmean(x), np.nanstd(x)
    if s == 0 or np.isnan(s):
        return x - m
    return (x - m) / s


# ===============================
# Load coil CSVs
# ===============================
csv_paths = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
if not csv_paths:
    raise SystemExit("No CSV files found in the given folder path.")

coil_dfs = []
for p in csv_paths:
    df = safe_read_csv(p)
    coil_id = (
        df["STD4_ID"].dropna().unique()[0]
        if "STD4_ID" in df.columns
        else os.path.basename(p)
    )
    coil_dfs.append((coil_id, df))

# ===============================
# Build sequences per signal
# ===============================
signal_sequences = {sig: [] for sig in signal_names}
for coil_id, df in coil_dfs:
    for sig in signal_names:
        if sig in df.columns:
            seq = pd.to_numeric(df[sig], errors="coerce").dropna().values
            if len(seq) >= 3:
                signal_sequences[sig].append((coil_id, znormalize(seq)))

# ===============================
# Compute DTW distance across coils
# ===============================
rows = []
for sig, seqs in signal_sequences.items():
    n = len(seqs)
    if n < 2:
        avg = mind = maxd = np.nan
    else:
        dists = []
        for (i, (cid_i, s_i)), (j, (cid_j, s_j)) in combinations(
            list(enumerate(seqs)), 2
        ):
            d = dtw_distance(s_i, s_j)
            dists.append(d)
        avg, mind, maxd = np.mean(dists), np.min(dists), np.max(dists)
    rows.append(
        {`0.
            "Signal": sig,
            "Coil_Count": n,
            "Avg_Pairwise_DTW": avg,
            "Min_Pairwise_DTW": mind,
            "Max_Pairwise_DTW": maxd,
        }
    )

# ===============================
# Save and show results
# ===============================
result_df = (
    pd.DataFrame(rows)
    .sort_values(by="Avg_Pairwise_DTW", na_position="last")
    .reset_index(drop=True)
)
result_df["Rank"] = result_df["Avg_Pairwise_DTW"].rank(method="min", na_option="keep")

result_df.to_csv(output_path, index=False)
print(f"Processed {len(coil_dfs)} coil files.") 
print(f"Results saved to: {output_path}")
print(result_df.head(15))

Processed 7 coil files.
Results saved to: dtw_signal_ranking.csv
                                Signal  Coil_Count  Avg_Pairwise_DTW  \
0      Stand 4 Bottom Current Feedback           5          8.013859   
1         Stand 3 Drive Speed Feedback           5          8.647728   
2         Stand 4 Top Current Feedback           5          8.672697   
3         Stand 2 Drive Speed Feedback           5          8.677180   
4      Tension Reel Calculated Tension           5         11.112124   
5      Stand 1 Bottom Current Feedback           5         11.812983   
6         Stand 2 Top Current Feedback           5         11.906186   
7      Stand 2 Bottom Current Feedback           5         11.941720   
8      Stand 3 Bottom Current Feedback           5         12.599543   
9          Stand 2-3 Tension Reference           5         13.011650   
10   Stand 4 OS Total Bending Feedback           5         13.198116   
11       Stand 4 OS Bending Shape Trim           5         13.810837   

## Using merged csv

In [None]:
import numpy as np
import pandas as pd
from itertools import combinations

# ===============================
# Configuration
# ===============================
merged_csv_path = r"combined_good_coil_data_filtered_added gauge target and phases_Gauge lst 0.08_std4.csv"  # your merged file
output_path = "dtw_signal_ranking.csv"

signal_names = [
    "Stand 1 Predicted Run Force",
    "Stand 1 Gap Stick Offset",
    "Tension Reel Calculated Tension",
    "Tension To Gap 2 In Limit",
    "Stand 1-2 Total Tension Feedback",
    "Ramp Greater Than Thread",
    "Stand 3 - Operator Side Force",
    "Flatness Control - Bending In Limit",
    "Stand 1 Run Gap Setpoint",
    "Stand 1 Gap Bite Offset",
    "S1 Operating Bending Trim",
    "Stand 2-3 Tension Reference",
    "Neet Oil Concentration",
    "Morgoil DriveTop Bearing Outflow Temp Stand1",
    "Stand 4 Top Current Feedback",
    "Morgoil DriveTop Bearing Outflow Temp Stand3",
    "Stand 3 Run Gap Setpoint",
    "Stand 2 Total Bending Feedback",
    "Stand 2 Gap Bite Offset",
    "Morgoil OperBottom Bearing Outflow Temp Stand3",
    "Stand 4 Thread Gap Setpoint",
    "X4 Gauge Deviation",
    "Stand 4 DS Total Bending Feedback",
    "Laser 0 Data Valid",
    "Stand 4 - Operator Side Force",
    "Stand 2 Gap Eccentricity Trim",
    "Stand 4 Gap Operator Offset",
    "Stand 3 Total Bending Feedback",
    "Strip In Stand 3",
    "Morgoil OperTop Bearing Outflow Temp Stand1",
    "Strip In Stand 1",
    "X1 Gauge Deviation",
    "Stand 3 Drive Speed Feedback",
    "Stand 2 Gap Thread Offset",
    "Stand 2 Drive Speed Feedback",
    "Stand 1-3 Solution System Pressure",
    "Stand 2 Top Current Feedback",
    "Stand 1-3 Solution Temperature",
    "AGC GE Feedforward Hardness Number",
    "Stand 1 Total Bending Feedback",
    "X0 Gauge Deviation",
    "Stand 3 Bottom Current Feedback",
    "Stand 4 Gap Eccentricity Trim",
    "Stand 2 Gap Stick Offset",
    "Stand 3-4 Tension Reference",
    "Stand 4 Bottom Current Feedback",
    "Stand 1 Bottom Current Feedback",
    "Stand 3 Gap Thread Offset",
    "Stand 2 Bottom Current Feedback",
    "Stand 4 Solution System Pressure",
    "Stand 3 Gap Eccentricity Trim",
    "Stand 4 OS Total Bending Feedback",
    "Stand 1 Gap Thread Offset",
    "AGC Alex Dynamic Feedforward Hardness Number",
    "Stand 3 Top Current Feedback",
    "S2 Operating Bending Trim",
    "Roll Force Hydraulic Tank Level Inches",
    "Roll Force Hydraulics Pressure Feedback",
    "Stand 1 Roll Force Increase Limit (based on predicted run force)",
    "Stand 4 OS Bending Shape Trim",
    "Stand 4 DS Bending Shape Trim",
]

# ===============================
# Helper functions
# ===============================
def dtw_distance(a, b):
    na, nb = len(a), len(b)
    if na == 0 or nb == 0:
        return float("inf")
    D = np.full((na + 1, nb + 1), np.inf)
    D[0, 0] = 0.0
    for i in range(1, na + 1):
        for j in range(1, nb + 1):
            cost = (a[i - 1] - b[j - 1]) ** 2
            D[i, j] = cost + min(D[i - 1, j], D[i, j - 1], D[i - 1, j - 1])
    return float(np.sqrt(D[na, nb]))


def znormalize(x):
    x = np.asarray(x, dtype=float)
    if len(x) == 0:
        return x
    m, s = np.nanmean(x), np.nanstd(x)
    if s == 0 or np.isnan(s):
        return x - m
    return (x - m) / s


# ===============================
# Load merged CSV
# ===============================
df = pd.read_csv(merged_csv_path)
if "STD4_ID" not in df.columns:
    raise SystemExit("Column 'STD4_ID' not found in merged CSV.")

# ===============================
# Build sequences per signal
# ===============================
signal_sequences = {sig: [] for sig in signal_names}
for coil_id, coil_df in df.groupby("STD4_ID"):
    for sig in signal_names:
        if sig in coil_df.columns:
            seq = pd.to_numeric(coil_df[sig], errors="coerce").dropna().values
            if len(seq) >= 3:
                signal_sequences[sig].append((coil_id, znormalize(seq)))

# ===============================
# Compute DTW distance across coils
# ===============================
rows = []
for sig, seqs in signal_sequences.items():
    n = len(seqs)
    if n < 2:
        avg = mind = maxd = np.nan
    else:
        dists = [
            dtw_distance(s_i, s_j)
            for (cid_i, s_i), (cid_j, s_j) in combinations(seqs, 2)
        ]
        avg, mind, maxd = np.mean(dists), np.min(dists), np.max(dists)
    rows.append(
        {
            "Signal": sig,
            "Coil_Count": n,
            "Avg_Pairwise_DTW": avg,
            "Min_Pairwise_DTW": mind,
            "Max_Pairwise_DTW": maxd,
        }
    )

# ===============================
# Save and show results
# ===============================
result_df = (
    pd.DataFrame(rows)
    .sort_values(by="Avg_Pairwise_DTW", na_position="last")
    .reset_index(drop=True)
)
result_df["Rank"] = result_df["Avg_Pairwise_DTW"].rank(method="min", na_option="keep")

result_df.to_csv(output_path, index=False)
print(f"Processed {df['STD4_ID'].nunique()} coils.")
print(f"Results saved to: {output_path}")
print(result_df.head(15))

## Signal Wise

In [None]:
import numpy as np
import pandas as pd
from itertools import combinations

# ===============================
# Configuration
# ===============================
merged_csv_path = r"combined_good_coil_data_filtered_added gauge target and phases_Gauge lst 0.08_std4.csv"
# output_path = "dtw_single_signal_result.csv"


# Give the signal name manually here
signal_name = "Stand 4 Bottom Current Feedback"  # change this as needed

# ===============================
# Helper functions
# ===============================
def dtw_distance(a, b):
    na, nb = len(a), len(b)
    if na == 0 or nb == 0:
        return float("inf")
    D = np.full((na + 1, nb + 1), np.inf)
    D[0, 0] = 0.0
    for i in range(1, na + 1):
        for j in range(1, nb + 1):
            cost = (a[i - 1] - b[j - 1]) ** 2
            D[i, j] = cost + min(D[i - 1, j], D[i, j - 1], D[i - 1, j - 1])
    return float(np.sqrt(D[na, nb]))


def znormalize(x):
    x = np.asarray(x, dtype=float)
    if len(x) == 0:
        return x
    m, s = np.nanmean(x), np.nanstd(x)
    if s == 0 or np.isnan(s):
        return x - m
    return (x - m) / s


# ===============================
# Load merged CSV
# ===============================
df = pd.read_csv(merged_csv_path)
if "STD4_ID" not in df.columns:
    raise SystemExit("Column 'STD4_ID' not found in merged CSV.")
if signal_name not in df.columns:
    raise SystemExit(f"Signal '{signal_name}' not found in CSV.")

# ===============================
# Build sequences per coil
# ===============================
signal_sequences = []
for coil_id, coil_df in df.groupby("STD4_ID"):
    seq = pd.to_numeric(coil_df[signal_name], errors="coerce").dropna().values
    if len(seq) >= 3:
        signal_sequences.append((coil_id, znormalize(seq)))

# ===============================
# Compute DTW distances
# ===============================
n = len(signal_sequences)
if n < 2:
    print(f"Not enough coils with valid data for signal '{signal_name}'.")
else:
    dists = [
        dtw_distance(s_i, s_j)
        for (cid_i, s_i), (cid_j, s_j) in combinations(signal_sequences, 2)
    ]
    avg_dtw = np.mean(dists)
    min_dtw = np.min(dists)
    max_dtw = np.max(dists)

    result = pd.DataFrame(
        [
            {
                "Signal": signal_name,
                "Coil_Count": n,
                "Avg_Pairwise_DTW": avg_dtw,
                "Min_Pairwise_DTW": min_dtw,
                "Max_Pairwise_DTW": max_dtw,
            }
        ]
    )

    # result.to_csv(output_path, index=False)
    print(f"Processed {n} coils for signal: {signal_name}")
    # print(f"Results saved to: {output_path}")
    print(result)

## GeeksforGeeks

In [None]:
import pandas as pd
import numpy as np
from itertools import combinations
from dtaidistance import dtw

# ===============================
# Configuration
# ===============================
merged_csv_path = r"combined_good_coil_data_filtered_added gauge target and phases_Gauge lst 0.08_std4.csv"
output_path = "dtw_single_signal_result.csv"

# Input signal to analyze (change manually as needed)
signal_name = "Stand 1 Predicted Run Force"

# ===============================
# Load merged CSV
# ===============================
df = pd.read_csv(merged_csv_path)
# df = df[df['Coil ID'].isin([5399575,5400215,5400236])]

if "STD4_ID" not in df.columns:
    raise SystemExit("Column 'STD4_ID' not found in merged CSV.")
if signal_name not in df.columns:
    raise SystemExit(f"Signal '{signal_name}' not found in CSV.")

# ===============================
# Helper: Normalize series
# ===============================
def znormalize(series):
    x = np.asarray(series, dtype=float)
    if len(x) == 0:
        return x
    m, s = np.nanmean(x), np.nanstd(x)
    if s == 0 or np.isnan(s):
        return x - m
    return (x - m) / s

# ===============================
# Build sequences for this signal per coil
# ===============================
signal_sequences = []
for coil_id, coil_df in df.groupby("STD4_ID"):
    seq = pd.to_numeric(coil_df[signal_name], errors="coerce").ffill().bfill().dropna().values
    if len(seq) >= 3:
        seq = znormalize(seq)
        signal_sequences.append((coil_id, seq))

# ===============================
# Compute DTW distances using dtaidistance
# ===============================
n = len(signal_sequences)
if n < 2:
    print(f"Not enough coils with valid data for signal '{signal_name}'.")
else:
    distances = []
    for (cid_i, s_i), (cid_j, s_j) in combinations(signal_sequences, 2):
        # Align lengths if needed (truncate to smallest)
        min_len = min(len(s_i), len(s_j))
        s_i, s_j = s_i[:min_len], s_j[:min_len]
        dist = dtw.distance(s_i, s_j)
        distances.append(dist)

    avg_dtw = np.mean(distances)
    min_dtw = np.min(distances)
    max_dtw = np.max(distances)

    result = pd.DataFrame(
        [
            {
                "Signal": signal_name,
                "Coil_Count": n,
                "Avg_Pairwise_DTW": avg_dtw,
                "Min_Pairwise_DTW": min_dtw,
                "Max_Pairwise_DTW": max_dtw,
            }
        ]
    )

    result.to_csv(output_path, index=False)
    print(f"Processed {n} coils for signal: {signal_name}")
    print(f"Results saved to: {output_path}")
    print(result)

In [2]:
import pandas as pd
import numpy as np

# ===============================
# Load data
# ===============================
df = pd.read_csv("bucket_coil_signal_averages_allsignals_allphase.csv")

# Signals = all numeric columns except metadata

signal_names = [
    "Stand 1 Predicted Run Force",
    "Stand 1 Gap Stick Offset",
    "Tension Reel Calculated Tension",
    "Tension To Gap 2 In Limit",
    "Stand 1-2 Total Tension Feedback",
    "Ramp Greater Than Thread",
    "Stand 3 - Operator Side Force",
    "Flatness Control - Bending In Limit",
    "Stand 1 Run Gap Setpoint",
    "Stand 1 Gap Bite Offset",
    "S1 Operating Bending Trim",
    "Stand 2-3 Tension Reference",
    "Neet Oil Concentration",
    "Morgoil DriveTop Bearing Outflow Temp Stand1",
    "Stand 4 Top Current Feedback",
    "Morgoil DriveTop Bearing Outflow Temp Stand3",
    "Stand 3 Run Gap Setpoint",
    "Stand 2 Total Bending Feedback",
    "Stand 2 Gap Bite Offset",
    "Morgoil OperBottom Bearing Outflow Temp Stand3",
    "Stand 4 Thread Gap Setpoint",
    "X4 Gauge Deviation",
    "Stand 4 DS Total Bending Feedback",
    "Laser 0 Data Valid",
    "Stand 4 - Operator Side Force",
    "Stand 2 Gap Eccentricity Trim",
    "Stand 4 Gap Operator Offset",
    "Stand 3 Total Bending Feedback",
    "Strip In Stand 3",
    "Morgoil OperTop Bearing Outflow Temp Stand1",
    "Strip In Stand 1",
    "X1 Gauge Deviation",
    "Stand 3 Drive Speed Feedback",
    "Stand 2 Gap Thread Offset",
    "Stand 2 Drive Speed Feedback",
    "Stand 1-3 Solution System Pressure",
    "Stand 2 Top Current Feedback",
    "Stand 1-3 Solution Temperature",
    "AGC GE Feedforward Hardness Number",
    "Stand 1 Total Bending Feedback",
    "X0 Gauge Deviation",
    "Stand 3 Bottom Current Feedback",
    "Stand 4 Gap Eccentricity Trim",
    "Stand 2 Gap Stick Offset",
    "Stand 3-4 Tension Reference",
    "Stand 4 Bottom Current Feedback",
    "Stand 1 Bottom Current Feedback",
    "Stand 3 Gap Thread Offset",
    "Stand 2 Bottom Current Feedback",
    "Stand 4 Solution System Pressure",
    "Stand 3 Gap Eccentricity Trim",
    "Stand 4 OS Total Bending Feedback",
    "Stand 1 Gap Thread Offset",
    "AGC Alex Dynamic Feedforward Hardness Number",
    "Stand 3 Top Current Feedback",
    "S2 Operating Bending Trim",
    "Roll Force Hydraulic Tank Level Inches",
    "Roll Force Hydraulics Pressure Feedback",
    "Stand 1 Roll Force Increase Limit (based on predicted run force)",
    "Stand 4 OS Bending Shape Trim",
    "Stand 4 DS Bending Shape Trim",
]

# ===============================
# Compute per-bucket signal STD (consistency)
# ===============================
rows = []

for bucket_id, g in df.groupby("Bucket_ID"):
    bucket_name = g["Bucket_Name"].iloc[0]

    for sig in signal_names:
        vals = g[sig].astype(float).dropna()
        if len(vals) >= 2:
            std_val = vals.std()
        else:
            std_val = np.nan

        rows.append({
            "Bucket_ID": bucket_id,
            "Bucket_Name": bucket_name,
            "Signal": sig,
            "STD": std_val
        })

res = pd.DataFrame(rows)

# ===============================
# Top N signals inside each bucket
# ===============================
TOP_N = 30

res["Rank_in_bucket"] = res.groupby("Bucket_ID")["STD"].rank(method="min", ascending=True)

top_per_bucket = res[res["Rank_in_bucket"] <= TOP_N]

# ===============================
# Common signals across buckets
# (sorted by average rank)
# ===============================
common = (
    top_per_bucket
    .groupby("Signal")["Rank_in_bucket"]
    .mean()
    .sort_values()
    .reset_index()
)

# ===============================
# Save output
# ===============================
top_per_bucket.to_csv("bucketwise_top_signals.csv", index=False)
common.to_csv("common_top_signals_across_buckets.csv", index=False)

print("Top signals per bucket saved → bucketwise_top_signals.csv")
print("Common best signals across buckets saved → common_top_signals_across_buckets.csv")

print("\nTop 20 Common Signals:")
print(common.head(20))

Top signals per bucket saved → bucketwise_top_signals.csv
Common best signals across buckets saved → common_top_signals_across_buckets.csv

Top 20 Common Signals:
                                            Signal  Rank_in_bucket
0                        S1 Operating Bending Trim        1.000000
1   Morgoil OperBottom Bearing Outflow Temp Stand3        2.416667
2                        Tension To Gap 2 In Limit        2.571429
3     Morgoil DriveTop Bearing Outflow Temp Stand1        3.250000
4      Morgoil OperTop Bearing Outflow Temp Stand1        3.500000
5     Morgoil DriveTop Bearing Outflow Temp Stand3        3.750000
6                                 Strip In Stand 3        4.333333
7                    Stand 3 Gap Eccentricity Trim        4.428571
8                    Stand 4 Gap Eccentricity Trim        4.761905
9                    Stand 2 Gap Eccentricity Trim        5.000000
10                                Strip In Stand 1        8.619048
11              AGC GE Feedforwar