# AudioMoth Sample Data

This notebook looks at the sample data given by the wildlife trust. Aim is to confirm the data is as expected.

Format 

In [None]:
import sys
import os
from pathlib import Path

# Go up one level to .../audiomoth
PROJECT_ROOT = Path(os.getcwd()).resolve().parent

# Add project root to sys.path so `src` is importable
sys.path.insert(0, str(PROJECT_ROOT))

In [None]:
import pandas as pd

EXCEL_PATH = PROJECT_ROOT / "data_raw" / "audiomoth_sample.xlsx"

# Make pandas show more columns/rows while exploring
pd.set_option("display.max_columns", 50)
pd.set_option("display.width", 120)

EXCEL_PATH, EXCEL_PATH.exists()

## Load sheet names

In [None]:
# List available sheets in the Excel file
xls = pd.ExcelFile(EXCEL_PATH)
xls.sheet_names

## Load Deployment Data

In [None]:
# Load the first sheet as deployment metadata (adjust if needed)
meta = pd.read_excel(EXCEL_PATH, sheet_name=0)
print(meta.shape)
meta.head()

## Load CWT2 Device Data

In [None]:
# Choose a device sheet to explore. Previewing the first 5 rows.
device_sheet_index = 1
CWT2_df = pd.read_excel(EXCEL_PATH, sheet_name=device_sheet_index)

print(CWT2_df.shape)
CWT2_df.head()

## Basic normalisation
Standardise column names and parse timestamps if present.


In [None]:
import src.audio_moth_schema as audio_moth_schema
import src.normaliser as normaliser

# Lowercase/underscore column names (non-destructive copy)
CWT2_df = normaliser.clean_column_names(CWT2_df)
CWT2_df = normaliser.combine_date_and_time(
    CWT2_df, date_col="date", time_col="time", output_col="time"
)


# Validate and convert types according to AudioMoth schema
CWT2_df = audio_moth_schema.AudioMothSchema.validate(CWT2_df)
CWT2_df.head()

## Quick Summaries


In [None]:
import matplotlib.pyplot as plt
# Top species by count

species_col = "common_name"  # Adjust if needed
top_species = CWT2_df[species_col].value_counts().head(15)
display(top_species)

# Plot top species

plt.figure()
top_species.sort_values().plot(kind="barh")
plt.title("Top detected species (sample)")
plt.xlabel("Detections")
plt.tight_layout()
plt.show()

## Hour of day activity

In [None]:
# Pick a timestamp column if present
ts_col = "time"

CWT2_df["hour"] = CWT2_df[ts_col].dt.hour  # type: ignore[reportAttributeAccessIssue]

hourly_counts = CWT2_df["hour"].value_counts().sort_index()
display(hourly_counts)

# Plot hourly activity
plt.figure()
hourly_counts.plot(kind="bar")
plt.title("Detections by hour of day")
plt.xlabel("Hour")
plt.ylabel("Detections")
plt.tight_layout()
plt.show()