In [1]:
import pandas as pd
from sklearn.cluster import KMeans

In [2]:
# Load your generated CSV
df = pd.read_csv("bucket_coil_signal_averages_allsignals_phasewise_rollingmode.csv")

In [5]:
# List of signal columns (replace with your actual signal column names)
signal_columns = [col for col in df.columns if col not in ['coil_id','CM_WIDTH','CP_X4GAUGE','Reduction','Width_Bin','Gauge_Bin','Reduction_Bin','Bucket_ID','Bucket_Name','rollingmode','Phase']]

# Prepare a new column for cluster labels (optional - one label per coil)
df['cluster_label'] = -1

# Iterate over each bucket to apply K-means clustering separately
for bucket in df['Bucket_ID'].unique():
    bucket_data = df[df['Bucket_ID'] == bucket]

    # Extract signal features for clustering
    X = bucket_data[signal_columns].dropna()

    if len(X) >= 2:  # Need at least 2 points to cluster
        # Set k (number of clusters) - you can define this dynamically or fixed
        k = 2  # example: 3 clusters per bucket

        # Run K-means
        kmeans = KMeans(n_clusters=k, random_state=42)
        cluster_labels = kmeans.fit_predict(X)

        # Assign cluster labels back to the dataframe
        df.loc[bucket_data.index, 'cluster_label'] = cluster_labels

    else:
        # If not enough data for clustering, assign a default label
        df.loc[bucket_data.index, 'cluster_label'] = 0



In [6]:
# Save clustered results
df.to_csv("bucket_coil_signal_averages_allsignals_phasewise_rollingmode_with_clusters.csv", index=False)