In [8]:
import pandas as pd

def get_all_signals_by_cluster(file_path):
    df = pd.read_csv(file_path)
    # Group by Cluster and Signal, count occurrences (all signals included)
    counts = df.groupby(['K means Cluster', 'Signal']).size().reset_index(name='count')
    counts['Signal'] = counts['Signal'].str.split('_').str[0]  # keep only the signal name
    return counts[['K means Cluster', 'Signal']]

# Get all signals cluster-wise for each file
df1_signals = get_all_signals_by_cluster('anomalies_outside_threshold_isolationforest_clustering_Data.csv')
df2_signals = get_all_signals_by_cluster('anomalies_outside_threshold_volatility_clustering_Data.csv')
df3_signals = get_all_signals_by_cluster('anomalies_outside_threshold_arima_clustering_Data.csv')

# Find common signals cluster-wise
common_signals = []
common_clusters = set(df1_signals['K means Cluster']) & set(df2_signals['K means Cluster']) & set(df3_signals['K means Cluster'])
for cluster in common_clusters:
    set1 = set(df1_signals[df1_signals['K means Cluster'] == cluster]['Signal'])
    set2 = set(df2_signals[df2_signals['K means Cluster'] == cluster]['Signal'])
    set3 = set(df3_signals[df3_signals['K means Cluster'] == cluster]['Signal'])
    common = set1 & set2 & set3
    common_signals.extend([(cluster, sig) for sig in common])

common_df = pd.DataFrame(common_signals, columns=['K means Cluster', 'Signal'])
common_df.to_csv('Common_Signals_Clusterwise.csv', index=False)

In [9]:
import pandas as pd

def get_top_signals_by_cluster(file_path, top_n=5):
    df = pd.read_csv(file_path)
    counts = df.groupby(['K means Cluster', 'Signal']).size().reset_index(name='count')
    counts['Signal'] = counts['Signal'].str.split('_').str[0]
    top_signals_list = []
    for cluster in counts['K means Cluster'].unique():
        top_signals = counts[counts['K means Cluster'] == cluster].sort_values(by='count', ascending=False)
        top_signals_list.append(top_signals)
    return pd.concat(top_signals_list)

# Example usage
top_10_df1 = get_top_signals_by_cluster('anomalies_outside_threshold_isolationforest_clustering_Data.csv', top_n=5)
top_10_df2 = get_top_signals_by_cluster('anomalies_outside_threshold_volatility_clustering_Data.csv', top_n=5)
top_10_df3 = get_top_signals_by_cluster('anomalies_outside_threshold_arima_clustering_Data.csv', top_n=5)

# To print the results cluster-wise per df
for cluster in sorted(top_10_df1['K means Cluster'].unique()):
    print(f"Cluster {cluster} top signals in Isolation Forest:")
    print(top_10_df1[top_10_df1['K means Cluster'] == cluster][['Signal', 'count']])
    print()

for cluster in sorted(top_10_df2['K means Cluster'].unique()):
    print(f"Cluster {cluster} top signals in Volatility:")
    print(top_10_df2[top_10_df2['K means Cluster'] == cluster][['Signal', 'count']])
    print()

for cluster in sorted(top_10_df3['K means Cluster'].unique()):
    print(f"Cluster {cluster} top signals in ARIMA:")
    print(top_10_df3[top_10_df3['K means Cluster'] == cluster][['Signal', 'count']])
    print()

Cluster 0 top signals in Isolation Forest:
                                            Signal  count
75                              Stand 4 Backup RPM     18
8     Morgoil DriveTop Bearing Outflow Temp Stand2     16
12  Morgoil OperBottom Bearing Outflow Temp Stand2     16
7     Morgoil DriveTop Bearing Outflow Temp Stand1     15
15     Morgoil OperTop Bearing Outflow Temp Stand1     15
..                                             ...    ...
24         Roll Force Hydraulics Pressure Feedback      2
26                       S2 Operating Bending Trim      2
25                       S1 Operating Bending Trim      2
2                                    Master Ramp.1      1
35                       Stand 1 Gap Thread Offset      1

[90 rows x 2 columns]

Cluster 1 top signals in Isolation Forest:
                                              Signal  count
97      Morgoil DriveTop Bearing Outflow Temp Stand1      9
98      Morgoil DriveTop Bearing Outflow Temp Stand2      9
155           

In [11]:
import pandas as pd

def get_top_signals_by_cluster(file_path, top_n=5):
    df = pd.read_csv(file_path)
    counts = df.groupby(['K means Cluster', 'Signal']).size().reset_index(name='count')
    counts['Signal'] = counts['Signal'].str.split('_').str[0]
    top_signals_list = []
    for cluster in counts['K means Cluster'].unique():
        top_signals = counts[counts['K means Cluster'] == cluster].sort_values(by='count', ascending=False)
        top_signals_list.append(top_signals)
    return pd.concat(top_signals_list)

# Example usage
top_10_df1 = get_top_signals_by_cluster('anomalies_outside_threshold_isolationforest_clustering_Data.csv', top_n=5)
top_10_df2 = get_top_signals_by_cluster('anomalies_outside_threshold_volatility_clustering_Data.csv', top_n=5)
top_10_df3 = get_top_signals_by_cluster('anomalies_outside_threshold_arima_clustering_Data.csv', top_n=5)

output_text = []
for cluster in sorted(top_10_df1['K means Cluster'].unique()):
    output_text.append(f"Cluster {cluster} top signals in Isolation Forest:")
    df_sub = top_10_df1[top_10_df1['K means Cluster'] == cluster][['Signal', 'count']]
    output_text.extend(df_sub.to_string(index=False).split('\n'))
    output_text.append("")
for cluster in sorted(top_10_df2['K means Cluster'].unique()):
    output_text.append(f"Cluster {cluster} top signals in Volatility:")
    df_sub = top_10_df2[top_10_df2['K means Cluster'] == cluster][['Signal', 'count']]
    output_text.extend(df_sub.to_string(index=False).split('\n'))
    output_text.append("")
for cluster in sorted(top_10_df3['K means Cluster'].unique()):
    output_text.append(f"Cluster {cluster} top signals in ARIMA:")
    df_sub = top_10_df3[top_10_df3['K means Cluster'] == cluster][['Signal', 'count']]
    output_text.extend(df_sub.to_string(index=False).split('\n'))
    output_text.append("")

# Save to text file
with open('top_signals_by_cluster.txt', 'w') as f:
    f.write('\n'.join(output_text))
