In [1]:
import pandas as pd
from scipy.stats import shapiro, mannwhitneyu
from scripts.utils import rank_biserial
from scipy.stats import chi2_contingency

In [2]:
# File paths
path_interactions_1b5h = "../../csv_data/interactions/nonsem_interactions_1b5h.csv"
path_interactions_3b3h = "../../csv_data/interactions/nonsem_interactions_3b3h.csv"

# Load the CSV files into DataFrames
df_interactions_1b5h = pd.read_csv(path_interactions_1b5h)
df_interactions_3b3h = pd.read_csv(path_interactions_3b3h)

### General Interactions

In [3]:
# Calculate percentage of people that interacted in 1B5H
total_count_1b5h = len(df_interactions_1b5h)
count_interacted_1b5h = len(df_interactions_1b5h[df_interactions_1b5h['total_interactions'] > 1])
percentage_interacted_1b5h = (count_interacted_1b5h / total_count_1b5h) * 100

print(f"Percentage of people that interacted in 1B5H: {percentage_interacted_1b5h:.2f}%")

Percentage of people that interacted in 1B5H: 54.81%


In [4]:
# Calculate percentage of people that interacted in 3B3H
total_count_3b3h = len(df_interactions_3b3h)
count_interacted_3b3h = len(df_interactions_3b3h[df_interactions_3b3h['total_interactions'] > 1])
percentage_interacted_3b3h = (count_interacted_3b3h / total_count_3b3h) * 100

print(f"Percentage of people that interacted in 3B3H: {percentage_interacted_3b3h:.2f}%")

Percentage of people that interacted in 3B3H: 59.62%


### Difference in active players: 1B5H vs. 3B3H

In [5]:
# Add the 'interacted' column to both dataframes
df_interactions_1b5h['interacted'] = df_interactions_1b5h['total_interactions'] > 0
df_interactions_3b3h['interacted'] = df_interactions_3b3h['total_interactions'] > 0

# Label the groups
df_interactions_1b5h['group'] = '1b5h'
df_interactions_3b3h['group'] = '3b3h'

# Combine the dataframes
combined_df = pd.concat([df_interactions_1b5h, df_interactions_3b3h])

# Create a contingency table
contingency_table = pd.crosstab(combined_df['group'], combined_df['interacted'])

print(contingency_table)

interacted  False  True 
group                   
1b5h           42     62
3b3h           36     68


In [6]:
# Perform the Chi-Square Test of Independence
chi2, p, _, _ = chi2_contingency(contingency_table)

print('Chi-Square statistic: {:.3f}, p-value: {:.3f}'.format(chi2, p))

Chi-Square statistic: 0.513, p-value: 0.474


### Difference in total interactions: 1B5H vs. 3B3H (All)

In [7]:
# Extract the 'items_copied' columns
total_interactions_1b5h = df_interactions_1b5h['total_interactions']
total_interactions_3b3h = df_interactions_3b3h['total_interactions']

In [8]:
# Check normality assumption
total_shapiro_1b5h = shapiro(total_interactions_1b5h)
total_shapiro_3b3h = shapiro(total_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (1B5H): {total_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (3B3H): {total_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (1B5H): 4.2750247722536987e-11
P-value of Shapiro-Wilk test for normality (3B3H): 8.991655573996217e-11


In [9]:
# If normality is violated in either group, use Mann-Whitney U test
if total_shapiro_1b5h.pvalue < 0.05 or total_shapiro_3b3h.pvalue < 0.05:   
    mannwhitney_test = mannwhitneyu(total_interactions_3b3h, total_interactions_1b5h, alternative='greater')
    print(f"Mann-Whitney U test: {mannwhitney_test}")

Mann-Whitney U test: MannwhitneyuResult(statistic=5838.5, pvalue=0.15421694337746855)


In [10]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_total_interactions = rank_biserial(total_interactions_3b3h, total_interactions_1b5h)
print(f"Effect Size (r) for total interations: {effect_size_total_interactions}")

Effect Size (r) for total interations: 0.07960428994082847


### Difference in human interactions: 1B5H vs. 3B3H

In [11]:
human_interactions_1b5h = df_interactions_1b5h['interactions_with_humans']
human_interactions_3b3h = df_interactions_3b3h['interactions_with_humans']

In [12]:
# Normalize the interactions
normalized_interactions_1b5h = human_interactions_1b5h / 5
normalized_interactions_3b3h = human_interactions_3b3h / 3

In [13]:
# Check normality assumption
human_interactions_shapiro_1b5h = shapiro(normalized_interactions_1b5h)
human_interactions_shapiro_3b3h = shapiro(normalized_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (Human Interaction - 1B5H): {human_interactions_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (Human Interaction - 3B3H): {human_interactions_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (Human Interaction - 1B5H): 5.722868126544044e-14
P-value of Shapiro-Wilk test for normality (Human Interaction - 3B3H): 3.828374527462227e-16


In [14]:
# If normality is violated in either group, use Mann-Whitney U test
if human_interactions_shapiro_1b5h.pvalue < 0.05 or human_interactions_shapiro_3b3h.pvalue < 0.05:   
    mannwhitney_test = mannwhitneyu(normalized_interactions_3b3h, normalized_interactions_1b5h, alternative='less')
    print(f"Mann-Whitney U test: {mannwhitney_test}")

Mann-Whitney U test: MannwhitneyuResult(statistic=4662.0, pvalue=0.026866141096817796)


In [15]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_human_interactions = rank_biserial(normalized_interactions_1b5h, normalized_interactions_3b3h)
print(f"Effect Size (r) for human interations: {effect_size_human_interactions}")

Effect Size (r) for human interations: 0.13794378698224863


### Difference in bot interactions: 1B5H vs. 3B3H

In [16]:
bot_interactions_1b5h = df_interactions_1b5h['interactions_with_bots']
bot_interactions_3b3h = df_interactions_3b3h['interactions_with_bots']

In [17]:
# Normalize the interactions
normalized_interactions_1b5h = bot_interactions_1b5h
normalized_interactions_3b3h = bot_interactions_3b3h / 3

In [18]:
# Check normality assumption
bot_interactions_shapiro_1b5h = shapiro(normalized_interactions_1b5h)
bot_interactions_shapiro_3b3h = shapiro(normalized_interactions_3b3h)

print(f"P-value of Shapiro-Wilk test for normality (Bot Interaction - 1B5H): {bot_interactions_shapiro_1b5h[1]}")
print(f"P-value of Shapiro-Wilk test for normality (Bot Interaction - 3B3H): {bot_interactions_shapiro_3b3h[1]}")

P-value of Shapiro-Wilk test for normality (Bot Interaction - 1B5H): 1.6042980956818937e-14
P-value of Shapiro-Wilk test for normality (Bot Interaction - 3B3H): 3.857941034096243e-11


In [19]:
# If normality is violated in either group, use Mann-Whitney U test
if bot_interactions_shapiro_1b5h.pvalue < 0.05 or bot_interactions_shapiro_3b3h.pvalue < 0.05:   
    mannwhitney_test = mannwhitneyu(normalized_interactions_3b3h, normalized_interactions_1b5h, alternative='greater')
    print(f"Mann-Whitney U test: {mannwhitney_test}")

Mann-Whitney U test: MannwhitneyuResult(statistic=5855.5, pvalue=0.1414574466101397)


In [20]:
# Calculate Effect Size (r) for Mann-Whitney U test
effect_size_bot_interactions = rank_biserial(normalized_interactions_1b5h, normalized_interactions_3b3h)
print(f"Effect Size (r) for bot interations: {effect_size_bot_interactions}")

Effect Size (r) for bot interations: -0.08274778106508873
