## <B>Statistics/P-value/Cohen's d statistical validation after normality check (all data)</B>

In [None]:
import pandas as pd
from scipy import stats
import warnings

# Set to ignore warnings
warnings.filterwarnings("ignore")

# File path and sheet name
file_path = '실험 결과.xlsx'
sheet_name = '시선 영역'

# Read data
df = pd.read_excel(file_path, sheet_name=sheet_name)
df_subset = df.iloc[14:39]

# List for saving results
results = []

# Repeat steps 1 through 5 to process each original and blurred image
for i in range(1, 6):
    column1_name = f'원본영상{i}'
    column2_name = f'블러영상{i}'

    # Select each column and remove missing values
    column1 = df_subset[column1_name].dropna()
    column2 = df_subset[column2_name].dropna()

    # Verify data size
    if len(column1) == 0 or len(column2) == 0:
        print(f"{column1_name} or {column2_name} columns don't have valid data.")
        continue

    #  Normality check (Shapiro-Wilk test)
    shapiro1_stat, shapiro1_p = stats.shapiro(column1)
    shapiro2_stat, shapiro2_p = stats.shapiro(column2)

    # Data normality
    column1_normal = shapiro1_p > 0.05
    column2_normal = shapiro2_p > 0.05

    if column1_normal and column2_normal:
        # If the data meets normality, perform a t-test
        t_stat, t_p_value = stats.ttest_rel(column1, column2)

        # Calculate Cohen's d
        diff = column1 - column2
        cohen_d = diff.mean() / diff.std(ddof=1)  # Standard deviation is set to ddof=1

        # Processing based on p-value conditions
        if t_p_value < 0.001:
            p_value_str = "p<0.001"
        else:
            p_value_str = f"p={t_p_value:.3f}"

        # Save the results
        results.append({
            'Comparison': f'{column1_name} vs {column2_name}',
            'Test': 'T-test',
            'Statistic': round(t_stat, 3),
            'p-value': p_value_str,
            "Cohen's d": round(cohen_d, 3)
        })

        # Output the results
        print(f"\n{column1_name} vs {column2_name}")
        print(f"T={t_stat:.3f}, {p_value_str}, d={cohen_d:.3f}")

    else:
        # Perform a Wilcoxon test if the data does not meet normality
        wilcoxon_stat, wilcoxon_p_value = stats.wilcoxon(column1, column2)

        # Calculate Wilcoxon's Cohen's d
        diff = column1 - column2
        cohen_d = diff.mean() / diff.std(ddof=1)  # Standard deviation is set to ddof=1

        # Processing based on p-value conditions
        if wilcoxon_p_value < 0.001:
            p_value_str = "p<0.001"
        else:
            p_value_str = f"p={wilcoxon_p_value:.3f}"

        # Save the results
        results.append({
            'Comparison': f'{column1_name} vs {column2_name}',
            'Test': 'Wilcoxon',
            'Statistic': round(wilcoxon_stat, 3),
            'p-value': p_value_str,
            "Cohen's d": round(cohen_d, 3)
        })

        # Output the results
        print(f"\n{column1_name} vs {column2_name}")
        print(f"W={wilcoxon_stat:.3f}, {p_value_str}, d={cohen_d:.3f}")

# Output a summary of the final results
summary_df = pd.DataFrame(results)
print("\nSummary of Results:")
print(summary_df)

## <B>Statistics/P-value/Cohen's d statistical validation after normality check (OriginalFirst/BlurredFirst data)</B>

In [None]:
import pandas as pd
from scipy import stats
from scipy.stats import wilcoxon
import warnings
import numpy as np

# Set to ignore warnings
warnings.filterwarnings("ignore")

# File path and sheet name
file_path = '실험 결과.xlsx'
sheet_name = '시선 영역'

# Read data
df = pd.read_excel(file_path, sheet_name=sheet_name)
df_subset = df.iloc[:39]

# List for saving results
results = []

# Repeat steps 1 through 5 for the original and blurred images.
for i in range(1, 6):
    column1_name = f'원본영상{i}'  # Original video column name
    column2_name = f'블러영상{i}'  # Blurred Image Column Name
    
    # Select each column and remove missing values
    column1 = df_subset[column1_name].dropna()
    column2 = df_subset[column2_name].dropna()
    
    # Convert to numbers, remove unconvertible values
    column1 = pd.to_numeric(column1, errors='coerce').dropna()
    column2 = pd.to_numeric(column2, errors='coerce').dropna()

    # Verify data size
    if len(column1) == 0 or len(column2) == 0:
        print(f"{column1_name} or {column2_name} columns don't have valid data.")
        continue

    # Split odd rows, even rows
    column1_odd = column1.iloc[::2]  # odd rows
    column1_even = column1.iloc[1::2]  # even rows

    column2_odd = column2.iloc[::2]  # odd rows
    column2_even = column2.iloc[1::2]  # even rows

    for (group_name, col1, col2) in [("홀수", column1_odd, column2_odd), ("짝수", column1_even, column2_even)]:
        if len(col1) == len(col2) and len(col1) > 0:
            # Normality check (Shapiro-Wilk test)
            shapiro1_stat, shapiro1_p = stats.shapiro(col1)
            shapiro2_stat, shapiro2_p = stats.shapiro(col2)

            # Data normality
            col1_normal = shapiro1_p > 0.05
            col2_normal = shapiro2_p > 0.05

            if col1_normal and col2_normal:
                # If your data meets normality, perform a t-test
                t_stat, t_p_value = stats.ttest_rel(col1, col2)

                # Calculate Cohen's d
                diff = col1 - col2
                cohen_d = diff.mean() / diff.std(ddof=1)

                # Formatting p-values
                formatted_p = "p<0.001" if t_p_value < 0.001 else f"p={t_p_value:.3f}"

                # Saving and printing results
                results.append({
                    'Comparison': f'{column1_name} vs {column2_name} ({group_name})',
                    'Test': 'T-test',
                    'Statistic': round(t_stat, 3),
                    'p-value': formatted_p,
                    "Cohen's d": round(cohen_d, 3)
                })

                print(f"\n{column1_name} vs {column2_name} ({group_name})")
                print(f"T={t_stat:.3f}, {formatted_p}, d={cohen_d:.3f}")
            else:
                # Perform a Wilcoxon test if the data does not meet normality
                wilcoxon_stat, wilcoxon_p_value = wilcoxon(col1, col2)

                # Calculate Wilcoxon's Cohen's d
                diff = col1 - col2
                cohen_d = diff.mean() / diff.std(ddof=1)

                # Formatting p-values
                formatted_p = "p<0.001" if wilcoxon_p_value < 0.001 else f"p={wilcoxon_p_value:.3f}"

                # Saving and printing results
                results.append({
                    'Comparison': f'{column1_name} vs {column2_name} ({group_name})',
                    'Test': 'Wilcoxon',
                    'Statistic': round(wilcoxon_stat, 3),
                    'p-value': formatted_p,
                    "Cohen's d": round(cohen_d, 3)
                })

                print(f"\n{column1_name} vs {column2_name} ({group_name})")
                print(f"W={wilcoxon_stat:.3f}, {formatted_p}, d={cohen_d:.3f}")
        else:
            print(f"The check could not be performed because the {group_name} data size in {column1_name} does not match.")

# Output a summary of the final results
summary_df = pd.DataFrame(results)
print("\nSummary of Results:")
print(summary_df)