In [39]:
import pandas as pd
from scipy.stats import shapiro, mannwhitneyu
from scripts.utils import rank_biserial
from scipy.stats import chi2_contingency
import numpy as np

In [40]:
# File paths
path_interactions_6h = "../../csv_data/interactions/nonsem_interactions_6h.csv"
path_interactions_1b5h = "../../csv_data/interactions/nonsem_interactions_1b5h.csv"
path_interactions_3b3h = "../../csv_data/interactions/nonsem_interactions_3b3h.csv"

# Load the CSV files into DataFrames
df_interactions_6h = pd.read_csv(path_interactions_6h)
df_interactions_1b5h = pd.read_csv(path_interactions_1b5h)
df_interactions_3b3h = pd.read_csv(path_interactions_3b3h)

### General Interactions

In [41]:
# Calculate percentage of people that interacted in 6H
total_count_6h = len(df_interactions_6h)
count_interacted_6h = len(df_interactions_6h[df_interactions_6h['total_interactions'] > 1])
percentage_interacted_6h = (count_interacted_6h / total_count_6h) * 100

print(f"Percentage of people that interacted in 6H: {percentage_interacted_6h:.2f}%")

Percentage of people that interacted in 6H: 44.36%


In [42]:
# Calculate percentage of people that interacted in 1B5H
total_count_1b5h = len(df_interactions_1b5h)
count_interacted_1b5h = len(df_interactions_1b5h[df_interactions_1b5h['total_interactions'] > 1])
percentage_interacted_1b5h = (count_interacted_1b5h / total_count_1b5h) * 100

print(f"Percentage of people that interacted in 1B5H: {percentage_interacted_1b5h:.2f}%")

Percentage of people that interacted in 1B5H: 53.85%


In [43]:
# Calculate percentage of people that interacted in 3B3H
total_count_3b3h = len(df_interactions_3b3h)
count_interacted_3b3h = len(df_interactions_3b3h[df_interactions_3b3h['total_interactions'] > 1])
percentage_interacted_3b3h = (count_interacted_3b3h / total_count_3b3h) * 100

print(f"Percentage of people that interacted in 3B3H: {percentage_interacted_3b3h:.2f}%")

Percentage of people that interacted in 3B3H: 59.62%


### Difference in active players: 1B5H vs. 3B3H

In [44]:
# Add the 'interacted' column
df_interactions_6h['interacted'] = df_interactions_6h['total_interactions'] > 0
df_interactions_1b5h['interacted'] = df_interactions_1b5h['total_interactions'] > 0
df_interactions_3b3h['interacted'] = df_interactions_3b3h['total_interactions'] > 0

# Label the groups
df_interactions_6h['group'] = '6h'
df_interactions_1b5h['group'] = '1b5h'
df_interactions_3b3h['group'] = '3b3h'

# Combine the data frames
df_combined = pd.concat([df_interactions_6h, df_interactions_1b5h, df_interactions_3b3h])

# Create a contingency table
contingency_table = pd.crosstab(df_combined['group'], df_combined['interacted'])

print(contingency_table)

interacted  False  True 
group                   
1b5h           42     62
3b3h           36     68
6h            211    197


In [45]:
# Perform the chi-squared test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Display the results
print(f"Chi-squared: {chi2}")
print(f"p-value: {p}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

# Determine if there's a significant difference
alpha = 0.05
if p < alpha:
    print("There is a significant difference in interaction rates between the groups.")
else:
    print("There is no significant difference in interaction rates between the groups.")

Chi-squared: 11.87372886037167
p-value: 0.0026402955160630135
Degrees of freedom: 2
Expected frequencies:
[[ 48.79220779  55.20779221]
 [ 48.79220779  55.20779221]
 [191.41558442 216.58441558]]
There is a significant difference in interaction rates between the groups.


### Difference in total interactions: 1B5H vs. 3B3H (All)

In [47]:
# Extract the 'items_copied' columns
total_interactions_6h = df_interactions_6h['total_interactions']
total_interactions_1b5h = df_interactions_1b5h['total_interactions']
total_interactions_3b3h = df_interactions_3b3h['total_interactions']

In [48]:
# Check normality assumption
total_shapiro_6h = shapiro(total_interactions_6h)
total_shapiro_1b5h = shapiro(total_interactions_1b5h)
total_shapiro_3b3h = shapiro(total_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (6H): {total_shapiro_6h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (1B5H): {total_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (3B3H): {total_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (6H): 7.607557262088692e-23
P-value of Shapiro-Wilk test for normality (1B5H): 1.1001364225356954e-10
P-value of Shapiro-Wilk test for normality (3B3H): 2.396852423987578e-10


In [49]:
# If normality is violated in either group, use Mann-Whitney U test
if total_shapiro_1b5h.pvalue < 0.05 or total_shapiro_3b3h.pvalue < 0.05 and total_shapiro_6h.pvalue < 0.05:
    mannwhitney_test_1b5h_6h = mannwhitneyu(total_interactions_1b5h,total_interactions_6h, alternative='greater')
    mannwhitney_test_3b3h_6h = mannwhitneyu(total_interactions_3b3h,total_interactions_6h, alternative='greater')
    mannwhitney_test_3b3h_1b5h = mannwhitneyu(total_interactions_3b3h, total_interactions_1b5h, alternative='greater')
    
    print(f"Mann-Whitney U test (1B5H vs. 6H): {mannwhitney_test_1b5h_6h}")
    print(f"Mann-Whitney U test (3B3H vs. 6H): {mannwhitney_test_3b3h_6h}")
    print(f"Mann-Whitney U test (3B3H vs. 1B5H): {mannwhitney_test_3b3h_1b5h}")

Mann-Whitney U test (1B5H vs. 6H): MannwhitneyuResult(statistic=25616.5, pvalue=0.0002443063670979666)
Mann-Whitney U test (3B3H vs. 6H): MannwhitneyuResult(statistic=27600.5, pvalue=2.385946819699146e-07)
Mann-Whitney U test (3B3H vs. 1B5H): MannwhitneyuResult(statistic=5917.0, pvalue=0.11419760451380212)


In [50]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_total_interactions_1b5h_6h = rank_biserial(total_interactions_1b5h, total_interactions_6h)
effect_size_total_interactions_3b3h_6h = rank_biserial(total_interactions_3b3h, total_interactions_6h)
effect_size_total_interactions_3b3h_1b5h = rank_biserial(total_interactions_3b3h, total_interactions_1b5h)

print(f"Effect Size (r) for total interations (1B5H vs. 6H): {effect_size_total_interactions_1b5h_6h}")
print(f"Effect Size (r) for total interations (3B3H vs. 6H): {effect_size_total_interactions_3b3h_6h}")
print(f"Effect Size (r) for total interations (3B3H vs. 1B5H): {effect_size_total_interactions_3b3h_1b5h}")

Effect Size (r) for total interations (1B5H vs. 6H): 0.2074142156862746
Effect Size (r) for total interations (3B3H vs. 6H): 0.30092854449472095
Effect Size (r) for total interations (3B3H vs. 1B5H): 0.09411982248520712


### Difference in human interactions: 1B5H vs. 3B3H

In [51]:
human_interactions_1b5h = df_interactions_1b5h['interactions_with_humans']
human_interactions_3b3h = df_interactions_3b3h['interactions_with_humans']

In [52]:
# Normalize the interactions
normalized_interactions_1b5h = human_interactions_1b5h / 5
normalized_interactions_3b3h = human_interactions_3b3h / 3

In [53]:
# Check normality assumption
human_interactions_shapiro_1b5h = shapiro(normalized_interactions_1b5h)
human_interactions_shapiro_3b3h = shapiro(normalized_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (Human Interaction - 1B5H): {human_interactions_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (Human Interaction - 3B3H): {human_interactions_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (Human Interaction - 1B5H): 1.5900731105063647e-13
P-value of Shapiro-Wilk test for normality (Human Interaction - 3B3H): 1.9352382840818392e-16


In [54]:
# If normality is violated in either group, use Mann-Whitney U test
if human_interactions_shapiro_1b5h.pvalue < 0.05 or human_interactions_shapiro_3b3h.pvalue < 0.05:   
    mannwhitney_test = mannwhitneyu(normalized_interactions_3b3h, normalized_interactions_1b5h, alternative='less')
    print(f"Mann-Whitney U test: {mannwhitney_test}")

Mann-Whitney U test: MannwhitneyuResult(statistic=4669.5, pvalue=0.028089787250912823)


In [55]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_human_interactions = rank_biserial(normalized_interactions_1b5h, normalized_interactions_3b3h)
print(f"Effect Size (r) for human interations: {effect_size_human_interactions}")

Effect Size (r) for human interations: 0.13655695266272194


### Difference in bot interactions: 1B5H vs. 3B3H

In [56]:
bot_interactions_1b5h = df_interactions_1b5h['interactions_with_bots']
bot_interactions_3b3h = df_interactions_3b3h['interactions_with_bots']

In [57]:
# Normalize the interactions
normalized_interactions_1b5h = bot_interactions_1b5h
normalized_interactions_3b3h = bot_interactions_3b3h / 3

In [58]:
# Check normality assumption
bot_interactions_shapiro_1b5h = shapiro(normalized_interactions_1b5h)
bot_interactions_shapiro_3b3h = shapiro(normalized_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (Bot Interaction - 1B5H): {bot_interactions_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (Bot Interaction - 3B3H): {bot_interactions_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (Bot Interaction - 1B5H): 3.1457695883968724e-13
P-value of Shapiro-Wilk test for normality (Bot Interaction - 3B3H): 2.1895398544170056e-10


In [59]:
# If normality is violated in either group, use Mann-Whitney U test
if bot_interactions_shapiro_1b5h.pvalue < 0.05 or bot_interactions_shapiro_3b3h.pvalue < 0.05:   
    mannwhitney_test = mannwhitneyu(normalized_interactions_3b3h, normalized_interactions_1b5h, alternative='greater')
    print(f"Mann-Whitney U test: {mannwhitney_test}")

Mann-Whitney U test: MannwhitneyuResult(statistic=5813.5, pvalue=0.16531075224694652)


In [60]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_bot_interactions = rank_biserial(normalized_interactions_1b5h, normalized_interactions_3b3h)
print(f"Effect Size (r) for bot interations: {effect_size_bot_interactions}")

Effect Size (r) for bot interations: -0.07498150887573962
