In [52]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Load the Excel file
# Load the Excel file
#file_path = 'VeryLow.xlsx'
#file_path = 'Low.xlsx'
#file_path = 'Medium.xlsx'
file_path = 'High.xlsx'
df = pd.read_excel(file_path, sheet_name='Pushing')
#df = pd.read_excel(file_path, sheet_name='Rolling')
#df = pd.read_excel(file_path, sheet_name='PushingEdges')

print(df)
print(df['RefreshRate'].unique())

# Create a binary matrix where 1 indicates 'Different' and 0 otherwise
#binary_df = df.set_index('RefreshRate').applymap(lambda x: 1 if x == 'Different' else 0)
#binary_df = df.set_index('RefreshRate').applymap(lambda x: 1 if str(x).strip().lower() == 'different' else 0)
binary_df = df.set_index('RefreshRate').applymap(lambda x: 1 if str(x).strip().lower() == 'different' else 0)
#print(binary_df.head())
print(binary_df)
print(binary_df.index)
#print(binary_df.index)
# Parameters
alpha = 0.05  # Significance level for each one-sided test
delta = 0.3   # Equivalence margin (adjust based on context)

# Reference data at 1000 Hz
reference_freq = 1000
reference_data = binary_df.loc[reference_freq]
reference_different = reference_data.astype(int)
ref_prop_different = reference_different.mean()
print(f"\nReference proportion of 'Different' at {reference_freq} Hz: {ref_prop_different:.3f}")
print(f"Reference sample size: {len(reference_different)}")

# Storage for results
frequencies = []
proportions = []
differences = []
ci_lowers = []
ci_uppers = []
equivalents = []
all_different = []

# Test frequencies (excluding reference)
test_frequencies = sorted([f for f in binary_df.index if f != reference_freq], reverse=True)

for freq in test_frequencies:
    print(f"\nTesting refresh rate {freq} Hz against reference {reference_freq} Hz")
    data_at_freq = binary_df.loc[freq]
    current_different = data_at_freq.astype(int)
    
    n1 = len(current_different)
    p1 = current_different.mean()
    n2 = len(reference_different)
    p2 = ref_prop_different
    
    print(f"Proportion at {freq} Hz: {p1:.3f}, Sample size: {n1}")
    print(f"Proportion at reference: {p2:.3f}, Sample size: {n2}")
    
    # Perform TOST for proportions
    if n1 > 0 and n2 > 0:
        d = p1 - p2  # Difference in proportions
        se = np.sqrt(p1 * (1 - p1) / n1 + p2 * (1 - p2) / n2)  # Standard error
        z = stats.norm.ppf(1 - alpha)  # Critical value for 90% CI (1.645)
        ci_lower = d - z * se
        ci_upper = d + z * se
        
        # Check equivalence: CI must be within (-delta, delta)
        equivalent = (ci_lower > -delta) and (ci_upper < delta)
        
        print(f"Difference in proportions: {d:.3f}")
        print(f"90% CI for difference: ({ci_lower:.3f}, {ci_upper:.3f})")
        print(f"Decision: {'Equivalent' if equivalent else 'Not equivalent'} within margin δ={delta}")
    else:
        print("TOST skipped: Insufficient sample size.")
        d, ci_lower, ci_upper, equivalent = np.nan, np.nan, np.nan, False
    
    # Store results
    frequencies.append(freq)
    proportions.append(p1)
    differences.append(d)
    ci_lowers.append(ci_lower)
    ci_uppers.append(ci_upper)
    equivalents.append(equivalent)
    all_different.append(current_different)

# Summarize equivalent frequencies
equivalent_frequencies = [freq for freq, eq in zip(frequencies, equivalents) if eq]
print(f"\nFrequencies equivalent to {reference_freq} Hz within margin δ={delta}: {equivalent_frequencies if equivalent_frequencies else 'None'}")

# --- Visualizations ---
frequencies = np.array(frequencies)
proportions = np.array(proportions)
all_frequencies = np.array(binary_df.index)
equivalent_mask = np.array(equivalents)

# 1. Difference Plot with 90% CI
plt.figure(figsize=(10, 6))
half_widths = [(ci_uppers[i] - differences[i]) for i in range(len(differences))]
plt.errorbar(x=frequencies, y=differences, yerr=half_widths, fmt='ko', label='Difference')
plt.axhline(y=0, color='k', linestyle='--')
plt.axhline(y=delta, color='r', linestyle='--', label='Equivalence bounds')
plt.axhline(y=-delta, color='r', linestyle='--')
plt.fill_between([min(frequencies)-100, max(frequencies)+100], -delta, delta, color='gray', alpha=0.2, label='Equivalence region')
plt.xlabel('Refresh Rate (Hz)')
plt.ylabel('Difference in Proportion of "Different" Feedback')
plt.title('Difference in Proportions with 90% CI vs. Reference')
plt.legend()
plt.savefig('tost_difference.png')
plt.close()

# 2. Proportion Plot
plt.figure(figsize=(10, 6))
plt.plot(frequencies[equivalent_mask], proportions[equivalent_mask], 'go', label=f'Equivalent within δ={delta}')
plt.plot(frequencies[~equivalent_mask], proportions[~equivalent_mask], 'ro', label='Not Equivalent')
plt.axhline(y=ref_prop_different, color='r', linestyle='--', label=f'Reference ({reference_freq} Hz)')
plt.xlabel('Refresh Rate (Hz)')
plt.ylabel('Proportion of "Different" Feedback')
plt.title('Proportion of "Different" Feedback vs. Reference')
plt.xticks(all_frequencies)
plt.legend()
plt.savefig('proportion_tost.png')
plt.close()

# 3. Box Plot (unchanged)
plot_data = pd.DataFrame({
    'Frequency': np.concatenate([np.full(len(d), f) for f, d in zip(frequencies, all_different)] + 
                                [np.full(len(reference_different), reference_freq)]),
    'Different': np.concatenate(all_different + [reference_different])
})
plt.figure(figsize=(10, 6))
ax = sns.boxplot(x='Frequency', y='Different', data=plot_data, palette='Set2')
ax.set_xticks(range(len(all_frequencies)))
ax.set_xticklabels(all_frequencies)
plt.xlabel('Refresh Rate (Hz)')
plt.ylabel('"Different" Feedback (1 = Yes, 0 = No)')
plt.title('Distribution of "Different" Feedback')
plt.savefig('boxplot_different.png')
plt.close()

print("\nVisualizations saved: 'tost_difference.png', 'proportion_tost.png', 'boxplot_different.png'")

    RefreshRate        User1        User2        User3        User4  \
0            50    Different    Different    Different    Different   
1           100    Different    Different    Different    Different   
2           200    Different    Different    Different    Different   
3           300    Different    Different    Different    Different   
4           400    Different    Different    Different  Indifferent   
5           500    Different  Indifferent    Different           VS   
6           600    Different  Indifferent  Indifferent           VS   
7           700  Indifferent  Indifferent  Indifferent  Indifferent   
8           800  Indifferent  Indifferent  Indifferent  Indifferent   
9           900  Indifferent  Indifferent  Indifferent  Indifferent   
10          950  Indifferent  Indifferent  Indifferent  Indifferent   
11         1000  Indifferent  Indifferent  Indifferent  Indifferent   

          User5        User6        User7        User8        User9  ...  \



Visualizations saved: 'tost_difference.png', 'proportion_tost.png', 'boxplot_different.png'
