In [17]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Load the Excel file
file_path = 'VeryLow.xlsx'
#file_path = 'Low.xlsx'
#file_path = 'Medium.xlsx'
#file_path = 'High.xlsx'
#df = pd.read_excel(file_path, sheet_name='Pushing')
df = pd.read_excel(file_path, sheet_name='Rolling')
#df = pd.read_excel(file_path, sheet_name='PushingEdges')

# Create a binary matrix where 1 indicates 'Different' and 0 otherwise
binary_df = df.set_index('RefreshRate').applymap(lambda x: 1 if x == 'Different' else 0)

# Parameters for Fisher's Exact Test
alpha = 0.05  # Significance level

# Reference data at 1000 Hz
reference_freq = 1000
reference_data = binary_df.loc[reference_freq]
reference_different = reference_data.astype(int)  # 1 if "Different", 0 otherwise
ref_prop_different = reference_different.mean()
print(f"\nReference proportion of 'Different' at {reference_freq} Hz: {ref_prop_different:.3f}")
print(f"Reference sample size: {len(reference_different)}")

# Storage for results
frequencies = []
proportions = []
fisher_p_values = []
all_different = []

# Test frequencies (excluding reference)
test_frequencies = sorted([f for f in binary_df.index if f != reference_freq], reverse=True)

for freq in test_frequencies:
    print(f"\nTesting refresh rate {freq} Hz against reference {reference_freq} Hz")
    data_at_freq = binary_df.loc[freq]
    current_different = data_at_freq.astype(int)
    
    curr_prop_different = current_different.mean()
    print(f"Proportion of 'Different' feedback at {freq} Hz: {curr_prop_different:.3f}")
    print(f"Sample size at {freq} Hz: {len(current_different)}")
    
    # Perform Fisher's Exact Test
    if len(current_different) > 0 and len(reference_different) > 0:
        # Create 2x2 contingency table: [Different, Not Different] x [Test Freq, Reference Freq]
        table = [
            [np.sum(current_different), len(current_different) - np.sum(current_different)],  # Test freq
            [np.sum(reference_different), len(reference_different) - np.sum(reference_different)]  # Reference freq
        ]
        # Perform Fisher's Exact Test
        odds_ratio, p_value = stats.fisher_exact(table)
        
        print(f"Fisher's Exact Test Details at {freq} Hz:")
        print(f"  - Contingency Table:")
        print(f"    Test Freq: Different={table[0][0]}, Not Different={table[0][1]}")
        print(f"    Ref Freq:  Different={table[1][0]}, Not Different={table[1][1]}")
        print(f"  - Odds Ratio: {odds_ratio:.3f}")
        print(f"  - P-value: {p_value:.5f}")
        
        # Decision
        if p_value > alpha:
            print(f"  - Decision: Fail to reject H0, no significant difference detected")
        else:
            print(f"  - Decision: Reject H0, significant difference detected")
    else:
        print(f"Fisher's Exact Test skipped: Insufficient sample size at {freq} Hz or reference.")
        p_value = np.nan
    
    # Store results
    frequencies.append(freq)
    proportions.append(curr_prop_different)
    fisher_p_values.append(p_value)
    all_different.append(current_different)

# Summarize frequencies with no significant difference
similar_frequencies = [freq for freq, p in zip(frequencies, fisher_p_values) if p > alpha]
print(f"\nFrequencies with no significant difference from {reference_freq} Hz (p > {alpha}): {similar_frequencies if similar_frequencies else 'None'}")

# --- Visualizations ---
frequencies = np.array(frequencies)
proportions = np.array(proportions)
fisher_p_values = np.array(fisher_p_values)
all_frequencies = np.array(binary_df.index)

# 1. Proportion Plot
plt.figure(figsize=(10, 6))
similar_mask = fisher_p_values > alpha
plt.plot(frequencies[similar_mask], proportions[similar_mask], 'go', label='No Significant Difference')
plt.plot(frequencies[~similar_mask], proportions[~similar_mask], 'ro', label='Significant Difference')
plt.axhline(y=ref_prop_different, color='r', linestyle='--', label=f'Reference ({reference_freq} Hz)')
plt.xlabel('Refresh Rate (Hz)')
plt.ylabel('Proportion of "Different" Feedback')
plt.title('Proportion of "Different" Feedback vs. Reference')
plt.xticks(all_frequencies)
plt.legend()
plt.savefig('proportion_fisher.png')
plt.close()

# 2. Fisher's P-value Plot
plt.figure(figsize=(10, 6))
plt.plot(frequencies, fisher_p_values, 'o-', label="Fisher's p-value")
plt.axhline(y=alpha, color='r', linestyle='--', label=f'α = {alpha}')
plt.yscale('log')
plt.xlabel('Refresh Rate (Hz)')
plt.ylabel("Fisher's P-value (log scale)")
plt.title("Fisher's Exact Test P-values Across Refresh Rates")
plt.xticks(all_frequencies)
plt.legend()
plt.savefig('pvalue_fisher.png')
plt.close()

# 3. Box Plot
plot_data = pd.DataFrame({
    'Frequency': np.concatenate([np.full(len(d), f) for f, d in zip(frequencies, all_different)] + 
                                [np.full(len(reference_different), reference_freq)]),
    'Different': np.concatenate(all_different + [reference_different])
})
plt.figure(figsize=(10, 6))
ax = sns.boxplot(x='Frequency', y='Different', data=plot_data, palette='Set2')
ax.set_xticks(range(len(all_frequencies)))
ax.set_xticklabels(all_frequencies)
plt.xlabel('Refresh Rate (Hz)')
plt.ylabel('"Different" Feedback (1 = Yes, 0 = No)')
plt.title('Distribution of "Different" Feedback')
plt.savefig('boxplot_different.png')
plt.close()

print("\nVisualizations saved: 'proportion_fisher.png', 'pvalue_fisher.png', 'boxplot_different.png'")



Reference proportion of 'Different' at 1000 Hz: 0.000
Reference sample size: 21

Testing refresh rate 950 Hz against reference 1000 Hz
Proportion of 'Different' feedback at 950 Hz: 0.000
Sample size at 950 Hz: 21
Fisher's Exact Test Details at 950 Hz:
  - Contingency Table:
    Test Freq: Different=0, Not Different=21
    Ref Freq:  Different=0, Not Different=21
  - Odds Ratio: nan
  - P-value: 1.00000
  - Decision: Fail to reject H0, no significant difference detected

Testing refresh rate 900 Hz against reference 1000 Hz
Proportion of 'Different' feedback at 900 Hz: 0.000
Sample size at 900 Hz: 21
Fisher's Exact Test Details at 900 Hz:
  - Contingency Table:
    Test Freq: Different=0, Not Different=21
    Ref Freq:  Different=0, Not Different=21
  - Odds Ratio: nan
  - P-value: 1.00000
  - Decision: Fail to reject H0, no significant difference detected

Testing refresh rate 800 Hz against reference 1000 Hz
Proportion of 'Different' feedback at 800 Hz: 0.000
Sample size at 800 Hz: 2