### Unpacking the folders

In [None]:
import os
import pandas as pd
from scipy.stats import ttest_ind

def get_final_count_from_file(file_path, kind="Prey"):
    df = pd.read_csv(file_path)
    max_frame = df['frame'].max()
    last_frame_df = df[df['frame'] == max_frame]
    count = last_frame_df[last_frame_df['kind'] == kind].shape[0]
    return count

def gather_counts_from_folder(folder_path, kind="Prey"):
    counts = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            count = get_final_count_from_file(file_path, kind)
            counts.append(count)
    return counts

# Set your folder paths
protector_folder = "Assignment_2/test_results/protector"
no_protector_folder = "Assignment_2/test_results/base_case"



### Test for prey

In [None]:
protector_counts = gather_counts_from_folder(protector_folder, kind="Prey")
no_protector_counts = gather_counts_from_folder(no_protector_folder, kind="Prey")
# Perform Welch's t-test (doesn't assume equal variance)
t_stat, p_value = ttest_ind(protector_counts, no_protector_counts, equal_var=False)

print(f"Protector group counts: {protector_counts}")
print(f"No protector group counts: {no_protector_counts}")
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

if p_value < 0.05:
    print("Significant difference between groups (reject null hypothesis)")
else:
    print("No significant difference between groups (fail to reject null hypothesis)")

### Test for predators

In [None]:
protector_counts = gather_counts_from_folder(protector_folder, kind="Predator")
no_protector_counts = gather_counts_from_folder(no_protector_folder, kind="Predator")
# Perform Welch's t-test (doesn't assume equal variance)
t_stat, p_value = ttest_ind(protector_counts, no_protector_counts, equal_var=False)

print(f"Protector group counts: {protector_counts}")
print(f"No protector group counts: {no_protector_counts}")
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

if p_value < 0.05:
    print("Significant difference between groups (reject null hypothesis)")
else:
    print("No significant difference between groups (fail to reject null hypothesis)")

### Code for heatmap

In [None]:
import os
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Paths to your folders:
folders = {
    "No Protector": "Assignment_2/test_results/base_case",
    "Protector": "Assignment_2/test_results/protector",
    "Superpredator": "Assignment_2/test_results/attacker",
}

def get_average_population_over_time(folder_path, kind, max_frame=1000):
    # Accumulate population counts per frame for all runs in this folder
    population_matrix = []
    
    for filename in os.listdir(folder_path):
        if not filename.endswith(".csv"):
            continue
        df = pl.read_csv(os.path.join(folder_path, filename))
        # Filter for the agent kind
        df_kind = df.filter(pl.col("kind") == kind)
        
        # Count number alive at each frame (0 to max_frame)
        frame_counts = []
        for frame in range(max_frame+1):
            count = df_kind.filter(pl.col("frame") == frame).height
            frame_counts.append(count)
        
        population_matrix.append(frame_counts)
    
    # Convert to numpy array: runs x frames
    population_matrix = np.array(population_matrix)
    
    # Average over runs (axis=0)
    avg_population = np.mean(population_matrix, axis=0)
    return avg_population

# Parameters
max_frame = 500  # adjust depending on your simulation length

# Prepare data for heatmap: each row = group, columns = frames
prey_data = []
predator_data = []

for group_name, folder_path in folders.items():
    avg_prey = get_average_population_over_time(folder_path, "Prey", max_frame)
    avg_pred = get_average_population_over_time(folder_path, "Predator", max_frame)
    prey_data.append(avg_prey)
    predator_data.append(avg_pred)

prey_data = np.array(prey_data)
predator_data = np.array(predator_data)

# Plot heatmap for prey counts
plt.figure(figsize=(12, 4))
sns.heatmap(prey_data, xticklabels=50, yticklabels=list(folders.keys()), cmap="YlGnBu")
plt.title("Average Prey Population Over Time by Group")
plt.xlabel("Frame")
plt.ylabel("Group")
plt.show()

# Plot heatmap for predator counts
plt.figure(figsize=(12, 4))
sns.heatmap(predator_data, xticklabels=50, yticklabels=list(folders.keys()), cmap="YlOrRd")
plt.title("Average Predator Population Over Time by Group")
plt.xlabel("Frame")
plt.ylabel("Group")
plt.show()


### One way ANOVA

In [None]:
import polars as pl
import os
from scipy.stats import f_oneway

def get_final_counts(folder_path, kind, final_frame=500):
    counts = []
    for filename in os.listdir(folder_path):
        if not filename.endswith(".csv"):
            continue
        df = pl.read_csv(os.path.join(folder_path, filename))
        count = df.filter((pl.col("kind") == kind) & (pl.col("frame") == final_frame)).height
        counts.append(count)
    return counts

# Extract data
prey_counts_per_group = []
predator_counts_per_group = []
group_names = []

for group_name, folder_path in folders.items():
    prey_counts = get_final_counts(folder_path, "Prey")
    predator_counts = get_final_counts(folder_path, "Predator")
    if prey_counts and predator_counts:
        prey_counts_per_group.append(prey_counts)
        predator_counts_per_group.append(predator_counts)
        group_names.append(group_name)

# Run ANOVA for Prey
f_stat_prey, p_value_prey = f_oneway(*prey_counts_per_group)
print(f"ANOVA Prey: F = {f_stat_prey:.3f}, p = {p_value_prey:.5f}")

# Run ANOVA for Predator
f_stat_pred, p_value_pred = f_oneway(*predator_counts_per_group)
print(f"ANOVA Predator: F = {f_stat_pred:.3f}, p = {p_value_pred:.5f}")
