In [None]:
#!pip install scikit-bio scikit-learn


In [None]:
import pandas as pd
import numpy as np
from skbio.diversity import beta_diversity
from skbio.stats.distance import permanova

# Load pickled data
df_transects = pd.read_pickle('../data/pkl/df_transects.pkl')
df_occurrences_with_taxon = pd.read_pickle('../data/pkl/df_occurrences_with_taxon.pkl')


In [None]:
# Filter transects and species (Old Reserve only)
df_transects['Year'] = pd.to_datetime(df_transects['start_time']).dt.year
mask_not_2008 = df_transects['Year'] != 2008
mask_keep_2024 = ~((df_transects['Year'] == 2024) & (df_transects['Pre: Transect physical habitat'] != 'shrubs closed'))
df_filtered_transects = df_transects[mask_not_2008 & mask_keep_2024].copy()

df_old_reserve_transects = df_filtered_transects[df_filtered_transects['Pre: On old reserve?'] == 'Yes'].copy()
valid_transect_uids = df_old_reserve_transects['UID']

exclude = ['Aves (medium)', 'Aves (small)', '']
df_occ_old_reserve = df_occurrences_with_taxon[
    df_occurrences_with_taxon['UID'].isin(valid_transect_uids) &
    ~df_occurrences_with_taxon['Taxon Label'].isin(exclude)
].copy()


In [None]:
# Build species-by-transect matrix
df_matrix = df_occ_old_reserve.pivot_table(
    index='UID',
    columns='Taxon Label',
    values='ID',
    aggfunc='count',
    fill_value=0
)

# Add habitat information
habitats = df_old_reserve_transects.set_index('UID')['Pre: Transect physical habitat']
habitats = habitats.loc[df_matrix.index]


In [None]:
# Transform abundances: log2(x + 1) + 1
df_log = np.log2(df_matrix + 1) + 1


In [None]:
# Run PERMANOVA (npMANOVA)
dm = beta_diversity('braycurtis', df_log.values, ids=df_log.index)
permanova_results = permanova(dm, grouping=habitats, permutations=999)
permanova_results
