# Is that dog really barking too much? 

#### Load Python tools and Jupyter config

In [1]:
import librosa
import subprocess
import numpy as np
import pandas as pd
import jupyter_black
import altair as alt
import altair_stiles as altstiles

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [3]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Analyze

#### Function to deal with audio file

In [4]:
def analyze_audio_events(file_path, file_name):
    # Convert M4A to WAV using FFmpeg with automatic overwrite
    input_path = file_path
    output_path = f"data/raw/converted_{file_name}.wav"
    subprocess.run(
        [
            "ffmpeg",
            "-y",
            "-i",
            input_path,
            output_path,
            "-hide_banner",
            "-loglevel",
            "error",
        ],
        check=True,
    )

    # Load the audio file
    y, sr = librosa.load(output_path, sr=None)

    # Compute the short-term energy using a moving RMS
    frame_length = int(sr * 0.05)  # 50 ms window
    hop_length = frame_length // 2  # 50% overlap
    energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[
        0
    ]

    # Convert energy to a DataFrame
    df = pd.DataFrame({"Energy": energy, "Frame": range(len(energy))})

    # Set a dynamic threshold for detection (e.g., median + some quantile of the distribution)
    threshold = np.median(energy) + np.quantile(energy - np.median(energy), 0.95)
    df["Threshold"] = threshold  # add threshold to the DataFrame

    # Find points where energy crosses the threshold
    df["Events"] = (df["Energy"] > threshold).astype(int)
    transitions = df["Events"].diff() == 1

    # Count the transitions
    event_count = transitions.sum()

    # Create an Altair chart
    line = alt.Chart(df).mark_line(color="blue", size=0.5).encode(x="Frame", y="Energy")

    rule = alt.Chart(df).mark_rule(color="red").encode(y="Threshold:Q")

    chart = (line + rule).properties(
        title=f"Recording analysis on {file_name}", width=900, height=300
    )

    # Display the chart
    chart.display()

    print(f"Number of potential barks detected: {event_count}\n\n")

#### Example usage

In [5]:
dog_file_name = "dog_20240507_090700.m4a"
dog_file_path = "data/raw/dog_20240507_090700.m4a"

#### Analyze audio events

In [6]:
analyze_audio_events(dog_file_path, dog_file_name)

Number of potential barks detected: 118


