# Distance spotted averages by year and habitat

In [2]:
import pandas as pd
from pathlib import Path
import xlsxwriter

# Load data
df_transects = pd.read_pickle('../data/pkl/df_transects.pkl')
df_occurrences = pd.read_pickle('../data/pkl/df_occurrences.pkl')

# Merge occurrences with transect info
df = df_occurrences.merge(
    df_transects[['UID', 'Pre: Transect physical habitat', 'Pre: On old reserve?', 'start_time']],
    how='left',
    left_on='TransectUID',
    right_on='UID'
)

# Use only transects on the old reserve
# df = df[df['Pre: On old reserve?'] == 'Yes']

df['Pre: Distance spotted'] = pd.to_numeric(df['Pre: Distance spotted'], errors='coerce')

# Extract year from start_time
df['Year'] = pd.to_datetime(df['start_time']).dt.year

# Compute summary statistics
summary = (
    df
    .groupby(['Pre: Transect physical habitat', 'Year'])['Pre: Distance spotted']
    .agg(['mean', 'count', 'std'])
    .reset_index()
    .sort_values(['Pre: Transect physical habitat', 'Year'])
)

# Rename columns
summary = summary.rename(columns={
    'mean': 'Mean distance (m)',
    'count': 'Count of occurrences',
    'std': 'SD'
})

# Export summary to formatted Excel table
output_path = Path('../data/export/excel/distance_spotted_summary.xlsx')
output_path.parent.mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    summary.to_excel(writer, index=False, sheet_name='Summary')
    workbook  = writer.book
    worksheet = writer.sheets['Summary']
    max_row, max_col = summary.shape
    worksheet.add_table(0, 0, max_row, max_col - 1, {'columns': [{'header': col} for col in summary.columns]})
    fmt = workbook.add_format({'num_format': '0.00'})
    worksheet.set_column('C:C', None, fmt)
    worksheet.set_column('E:E', None, fmt)

summary


Unnamed: 0,Pre: Transect physical habitat,Year,Mean distance (m),Count of occurrences,SD
0,grass closed,2003,5.261905,42,6.454567
1,grass closed,2007,8.152542,59,10.663851
2,grass closed,2008,10.371429,70,14.64673
3,grass closed,2011,2.946809,94,2.44671
4,grass closed,2018,3.540816,98,5.453493
5,grass closed,2019,2.320755,106,3.758285
6,grass closed,2024,1.25,32,3.069097
7,shrubs closed,2003,1.230769,26,0.429669
8,shrubs closed,2008,5.166667,6,2.857738
9,shrubs closed,2018,1.16129,31,0.454369
