In [5]:
import pandas as pd
from scipy.stats import shapiro, mannwhitneyu
from scripts.utils import rank_biserial
from scipy.stats import chi2_contingency
import numpy as np

In [6]:
# File paths
path_interactions_6h = "../../data/interactions/nonsem_interactions_6h.csv"
path_interactions_1b5h = "../../data/interactions/nonsem_interactions_1b5h.csv"
path_interactions_3b3h = "../../data/interactions/nonsem_interactions_3b3h.csv"

# Load the CSV files into DataFrames
df_interactions_6h = pd.read_csv(path_interactions_6h)
df_interactions_1b5h = pd.read_csv(path_interactions_1b5h)
df_interactions_3b3h = pd.read_csv(path_interactions_3b3h)

### Difference in total interactions

In [7]:
# Extract the 'items_copied' columns
total_interactions_6h = df_interactions_6h['total_interactions']
total_interactions_1b5h = df_interactions_1b5h['total_interactions']
total_interactions_3b3h = df_interactions_3b3h['total_interactions']

In [8]:
# Check normality assumption
total_shapiro_6h = shapiro(total_interactions_6h)
total_shapiro_1b5h = shapiro(total_interactions_1b5h)
total_shapiro_3b3h = shapiro(total_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (6H): {total_shapiro_6h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (1B5H): {total_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (3B3H): {total_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (6H): 3.395831040569697e-14
P-value of Shapiro-Wilk test for normality (1B5H): 1.347919472153698e-05
P-value of Shapiro-Wilk test for normality (3B3H): 0.0011686358298843341


In [9]:
# If normality is violated in either group, use Mann-Whitney U test
if total_shapiro_1b5h.pvalue < 0.05 or total_shapiro_3b3h.pvalue < 0.05 and total_shapiro_6h.pvalue < 0.05:
    mannwhitney_test_1b5h_6h = mannwhitneyu(total_interactions_1b5h,total_interactions_6h, alternative='greater')
    mannwhitney_test_3b3h_6h = mannwhitneyu(total_interactions_3b3h,total_interactions_6h, alternative='greater')
    mannwhitney_test_3b3h_1b5h = mannwhitneyu(total_interactions_3b3h, total_interactions_1b5h, alternative='greater')
    
    print(f"Mann-Whitney U test (1B5H vs. 6H): {mannwhitney_test_1b5h_6h}")
    print(f"Mann-Whitney U test (3B3H vs. 6H): {mannwhitney_test_3b3h_6h}")
    print(f"Mann-Whitney U test (3B3H vs. 1B5H): {mannwhitney_test_3b3h_1b5h}")

Mann-Whitney U test (1B5H vs. 6H): MannwhitneyuResult(statistic=np.float64(20911.0), pvalue=np.float64(0.5897938291725783))
Mann-Whitney U test (3B3H vs. 6H): MannwhitneyuResult(statistic=np.float64(22977.0), pvalue=np.float64(0.09541878613966448))
Mann-Whitney U test (3B3H vs. 1B5H): MannwhitneyuResult(statistic=np.float64(5965.0), pvalue=np.float64(0.09975326601492435))


In [10]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_total_interactions_1b5h_6h = rank_biserial(total_interactions_1b5h, total_interactions_6h)
effect_size_total_interactions_3b3h_6h = rank_biserial(total_interactions_3b3h, total_interactions_6h)
effect_size_total_interactions_3b3h_1b5h = rank_biserial(total_interactions_3b3h, total_interactions_1b5h)

print(f"Effect Size (r) for total interations (1B5H vs. 6H): {effect_size_total_interactions_1b5h_6h}")
print(f"Effect Size (r) for total interations (3B3H vs. 6H): {effect_size_total_interactions_3b3h_6h}")
print(f"Effect Size (r) for total interations (3B3H vs. 1B5H): {effect_size_total_interactions_3b3h_1b5h}")

Effect Size (r) for total interations (1B5H vs. 6H): -0.014375942684766163
Effect Size (r) for total interations (3B3H vs. 6H): 0.08300339366515841
Effect Size (r) for total interations (3B3H vs. 1B5H): 0.10299556213017746
