In [1]:
import pveagle
import dotenv
import os

_ = dotenv.load_dotenv()

## Adding a Voice

```pip install pveagledemo```

```eagle_demo_mic enroll --access_key ${ACCESS_KEY} --output_profile_path ${OUTPUT_PROFILE_PATH}```

 

In [None]:
access_key = os.getenv("PICOVOICE_ACCESS_KEY")   
eagle_profiler = pveagle.create_profiler(access_key)

In [None]:
import os
from pathlib import Path
import wave
import numpy as np
import sounddevice as sd
import pveagle
import dotenv

dotenv.load_dotenv()
ACCESS_KEY = os.getenv("PICOVOICE_ACCESS_KEY")

profile_paths = {
    "Liam": Path("./applications/satellite/src/satellite/audio/recognition/eagle/voices/liam"),
    "Madi": Path("./applications/satellite/src/satellite/audio/recognition/eagle/voices/madi"),
}

speaker_names = []
speaker_profiles = []
for name, path in profile_paths.items():
    if not path.exists():
        raise FileNotFoundError(f"Profile file missing: {path}")
    with path.open("rb") as f:
        speaker_profiles.append(pveagle.EagleProfile.from_bytes(f.read()))
    speaker_names.append(name)

recognizer = pveagle.create_recognizer(
    access_key=ACCESS_KEY,
    speaker_profiles=speaker_profiles,
)
FRAME_LENGTH = recognizer.frame_length
SAMPLE_RATE = recognizer.sample_rate

def _stream_scores(frames: np.ndarray) -> np.ndarray:
    scores = np.zeros(len(speaker_names), dtype=np.float64)
    num_frames = len(frames) // FRAME_LENGTH
    trimmed = frames[: num_frames * FRAME_LENGTH]
    if num_frames == 0:
        return scores
    recognizer.reset()
    chunks = trimmed.reshape(-1, FRAME_LENGTH)
    for chunk in chunks:
        scores += np.array(recognizer.process(chunk))
    return scores

def identify_from_wav(path: str) -> dict:
    with wave.open(path, "rb") as wav_file:
        if wav_file.getframerate() != SAMPLE_RATE or wav_file.getnchannels() != 1:
            raise ValueError("Audio must be mono PCM16 at 16 kHz for Eagle.")
        pcm = np.frombuffer(wav_file.readframes(wav_file.getnframes()), dtype=np.int16)
    scores = _stream_scores(pcm)
    idx = int(np.argmax(scores))
    confidence = float(scores[idx] / scores.sum()) if scores.sum() else 0.0
    return {
        "winner": speaker_names[idx],
        "confidence": confidence,
        "scores": dict(zip(speaker_names, scores)),
    }

def identify_from_microphone(seconds: float = 5.0) -> dict:
    recording = sd.rec(
        int(seconds * SAMPLE_RATE),
        samplerate=SAMPLE_RATE,
        channels=1,
        dtype=np.int16,
        blocking=True,
    ).reshape(-1)
    scores = _stream_scores(recording)
    idx = int(np.argmax(scores))
    confidence = float(scores[idx] / scores.sum()) if scores.sum() else 0.0
    return {
        "winner": speaker_names[idx],
        "confidence": confidence,
        "scores": dict(zip(speaker_names, scores)),
    }

# result = identify_from_wav("test_clip.wav")
result = identify_from_microphone(4)
print(result)

{'winner': 'Liam', 'confidence': 0.999626278828649, 'scores': {'Liam': np.float64(63.8753137588501), 'Madi': np.float64(0.023880481719970703)}}
