In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import kruskal

In [2]:
# Load your data
df = pd.read_csv("bucket_coil_signal_averages_allsignals.csv")
# List signal columns (exclude ID and bucket columns)
signal_cols = [col for col in df.columns if col not in ['coil_id', 'Bucket_ID', 'Bucket_Name', 'CM_WIDTH', 'CP_X4GAUGE', 'Width_Bin', 'Gauge_Bin']]

## ANOVA

In [3]:
from scipy.stats import f_oneway
import numpy as np
import pandas as pd

def compare_signal_distributions_anova(df, bucket_col, signal_cols):
    results = []

    for bucket, bucket_df in df.groupby(bucket_col):
        for signal in signal_cols:
            values = bucket_df[signal].dropna().values

            if len(values) < 2:
                continue

            median_val = np.median(values)
            group1 = values[values <= median_val]
            group2 = values[values > median_val]

            if len(group1) > 1 and len(group2) > 1:
                F, p = f_oneway(group1, group2)
            else:
                continue

            results.append({
                'Bucket_ID': bucket,
                'Signal': signal,
                'F_statistic': F,
                'p_value': p,
                'Significant': p < 0.05
            })

    return pd.DataFrame(results)

In [4]:
# Run comparison
summary_df = compare_signal_distributions_anova(df, 'Bucket_ID', signal_cols)

# View summary
print(summary_df.head())

  res = hypotest_fun_out(*samples, **kwds)


   Bucket_ID                           Signal   F_statistic       p_value  \
0          1  Shape Target Second Coefficient  4.353914e+06  0.000000e+00   
1          1      Stand 1 Predicted Run Force  4.262523e+02  6.273365e-60   
2          1         Stand 1 Gap Stick Offset  1.164743e+02  3.182880e-23   
3          1  Tension Reel Calculated Tension  5.517261e+02  1.848417e-70   
4          1        Tension To Gap 2 In Limit  7.869836e+02  1.370855e-86   

   Significant  
0         True  
1         True  
2         True  
3         True  
4         True  


In [5]:
#save to csv
summary_df.to_csv('bucket_t_test_summary_anova.csv', index=False)

print("Summary CSV 't_test_summary.csv' created successfully.")

Summary CSV 't_test_summary.csv' created successfully.


## Twin Test

In [6]:
from scipy.stats import ttest_ind
import numpy as np
import pandas as pd

def compare_signal_distributions_ttest(df, bucket_col, signal_cols):
    results = []

    for bucket, bucket_df in df.groupby(bucket_col):
        for signal in signal_cols:
            values = bucket_df[signal].dropna().values

            if len(values) < 2:
                continue

            median_val = np.median(values)
            group1 = values[values <= median_val]
            group2 = values[values > median_val]

            if len(group1) > 1 and len(group2) > 1:
                t_stat, p = ttest_ind(group1, group2, equal_var=False)  # Welch's t-test
            else:
                continue

            results.append({
                'Bucket_ID': bucket,
                'Signal': signal,
                'T_statistic': t_stat,
                'p_value': p,
                'Significant': p < 0.05
            })

    return pd.DataFrame(results)

In [7]:
# Run comparison
summary_df = compare_signal_distributions_ttest(df, 'Bucket_ID', signal_cols)

# View summary
print(summary_df.head())

  res = hypotest_fun_out(*samples, **kwds)


   Bucket_ID                           Signal  T_statistic       p_value  \
0          1  Shape Target Second Coefficient -4136.312009  0.000000e+00   
1          1      Stand 1 Predicted Run Force   -20.645878  2.302267e-59   
2          1         Stand 1 Gap Stick Offset    -8.707882  2.991308e-14   
3          1  Tension Reel Calculated Tension   -23.488850  8.954245e-65   
4          1        Tension To Gap 2 In Limit   -10.258033  3.131485e-12   

   Significant  
0         True  
1         True  
2         True  
3         True  
4         True  


In [8]:
#save to csv
summary_df.to_csv('bucket_t_test_summary_twintest.csv', index=False)

print("Summary CSV 't_test_summary.csv' created successfully.")

Summary CSV 't_test_summary.csv' created successfully.


## counts

In [16]:
# Load your data, e.g. from CSV:
df = pd.read_csv('bucket_t_test_summary_twintest.csv')

# If you already have a DataFrame from elsewhere, proceed:

# Ensure 'Significant' column is boolean
df['Significant'] = df['Significant'].astype(str).str.upper()
df['Significant'] = df['Significant'] == 'TRUE'

# Group by 'Bucket' and count TRUE/FALSE
summary = df.groupby('Bucket_ID')['Significant'].value_counts().unstack(fill_value=0)

# Rename columns for clarity
summary.columns = ['FALSE_count', 'TRUE_count']
summary = summary[['TRUE_count', 'FALSE_count']]
print(summary)

           TRUE_count  FALSE_count
Bucket_ID                         
1                 139            3
2                 128            6
3                 119           11
5                 123            9
6                 116           11
7                 123           10
9                 114            8
10                102           19


In [17]:
summary.to_csv("bucket_t_test_summary_counts_twintest.csv")