# Pairwise Field Season Comparison
This notebook compares consecutive field seasons using Welch's t-test.

In [None]:
from dataclasses import dataclass
from typing import List

import pandas as pd
from scipy.stats import ttest_ind

@dataclass
class SeasonComparison:
    """Container for the comparison between two field seasons."""
    season_a: int
    season_b: int
    mean_a: float
    mean_b: float
    t_stat: float
    p_value: float
    significant: bool

def compare_consecutive_seasons(
    df: pd.DataFrame,
    *,
    season_col: str = "Year",
    value_col: str = "Pre: Distance spotted",
    alpha: float = 0.05,
) -> pd.DataFrame:
    """Perform pairwise comparisons between consecutive field seasons."""
    field_seasons: List[int] = sorted(df[season_col].dropna().unique())
    results: List[SeasonComparison] = []
    for i in range(1, len(field_seasons)):
        season_a = field_seasons[i - 1]
        season_b = field_seasons[i]
        group_a = df[df[season_col] == season_a][value_col].dropna()
        group_b = df[df[season_col] == season_b][value_col].dropna()
        if len(group_a) > 0 and len(group_b) > 0:
            stat, p = ttest_ind(group_b, group_a, equal_var=False)
            results.append(SeasonComparison(season_a, season_b, group_a.mean(), group_b.mean(), stat, p, p < alpha))
        else:
            print(f"Skipping comparison {season_a} vs {season_b} due to missing data")
    comparison_df = pd.DataFrame([
        {
            "Field Season A": r.season_a,
            "Field Season B": r.season_b,
            "Mean A": r.mean_a,
            "Mean B": r.mean_b,
            "T-stat": r.t_stat,
            "p-value": r.p_value,
            "Significant (p<0.05)": r.significant,
        }
        for r in results
    ])
    return comparison_df.round(4)


In [None]:
# Example usage with a DataFrame `df_cleaned`
# comparison_df = compare_consecutive_seasons(df_cleaned)
# comparison_df
