# Distance spotted averages by year and habitat

In [None]:
import pandas as pd

# Load data
df_transects = pd.read_pickle('../data/pkl/df_transects.pkl')
df_occurrences = pd.read_pickle('../data/pkl/df_occurrences.pkl')

# Merge occurrences with transect info
df = df_occurrences.merge(
    df_transects[['UID', 'Pre: Transect physical habitat', 'Pre: On old reserve?', 'start_time']],
    how='left',
    left_on='TransectUID',
    right_on='UID'
)

# Filter to old reserve transects
df_old = df[df['Pre: On old reserve?'] == 'Yes'].copy()

# Extract year from start_time
df_old['Year'] = pd.to_datetime(df_old['start_time']).dt.year

# Compute summary statistics
summary = (
    df_old
    .groupby(['Pre: Transect physical habitat', 'Year'])['Pre: Distance spotted']
    .agg(['mean', 'count', 'std'])
    .reset_index()
    .sort_values(['Pre: Transect physical habitat', 'Year'])
)

# Rename columns
summary = summary.rename(columns={
    'mean': 'Mean distance (m)',
    'count': 'Count of occurrences',
    'std': 'SD'
})

summary
