Listening to a hot word

Setup Porcupine

In [1]:
import pvporcupine
from pvrecorder import PvRecorder

import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the PORCUPINE_KEY from the environment variables
porcupine_key = os.getenv("PORCUPINE_KEY")

porcupine = pvporcupine.create(
    access_key=porcupine_key, keywords=["bumblebee"])

Setup Recorder and create wav file

In [2]:
import wave


recorder = PvRecorder(frame_length=porcupine.frame_length, device_index=-1)
recorder.start()


output_path = os.path.join(os.getcwd(), "output.wav")

wav_file = None
if output_path is not None:
    wav_file = wave.open(output_path, "w")
    wav_file.setnchannels(1)
    wav_file.setsampwidth(2)
    wav_file.setframerate(16000)

Setup Recording and Porcupine hot word processing

In [3]:
from pvrecorder import PvRecorder

recorder = PvRecorder(device_index=-1, frame_length=512)
recorder.start()

pcm = recorder.read()
keyword_index = porcupine.process(pcm)
if keyword_index == 0:
    print("detected")

Start recording and listening on hot word

In [4]:
import struct

try:
    while True:
        pcm = recorder.read()
        result = porcupine.process(pcm)

        if wav_file is not None:
            wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))

        if result >= 0:
            print("Bubmlebee detected")
except KeyboardInterrupt:
    print("Stopping ...")
finally:
    recorder.delete()
    porcupine.delete()
    if wav_file is not None:
        wav_file.close()

Bubmlebee detected
Stopping ...


In [None]:
import speech_recognition as sr
import pyaudio

init_rec = sr.Recognizer()
print("Let's speak!!")
with sr.Microphone() as source:
    audio_data = init_rec.record(source, duration=5)
    print("Recognizing your text.............")
    text = init_rec.recognize_google(audio_data)
    print(text)

Text to Speech

List languages

In [None]:
from typing import Sequence

import google.cloud.texttospeech as tts


def unique_languages_from_voices(voices: Sequence[tts.Voice]):
    language_set = set()
    for voice in voices:
        for language_code in voice.language_codes:
            language_set.add(language_code)
    return language_set


def list_languages():
    client = tts.TextToSpeechClient()
    response = client.list_voices()
    languages = unique_languages_from_voices(response.voices)

    print(f" Languages: {len(languages)} ".center(60, "-"))
    for i, language in enumerate(sorted(languages)):
        print(f"{language:>10}", end="\n" if i % 5 == 4 else "")

list_languages()

List voices

In [None]:
import google.cloud.texttospeech as tts


def list_voices(language_code=None):
    client = tts.TextToSpeechClient()
    response = client.list_voices(language_code=language_code)
    voices = sorted(response.voices, key=lambda voice: voice.name)

    print(f" Voices: {len(voices)} ".center(60, "-"))
    for voice in voices:
        languages = ", ".join(voice.language_codes)
        name = voice.name
        gender = tts.SsmlVoiceGender(voice.ssml_gender).name
        rate = voice.natural_sample_rate_hertz
        print(f"{languages:<8} | {name:<24} | {gender:<8} | {rate:,} Hz")

list_voices("de")

In [None]:
import google.cloud.texttospeech as tts
from pydub import AudioSegment
from pydub.playback import play


def text_to_wav(voice_name: str, text: str):
    language_code = "-".join(voice_name.split("-")[:2])
    text_input = tts.SynthesisInput(text=text)
    voice_params = tts.VoiceSelectionParams(
        language_code=language_code, name=voice_name
    )
    audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)

    client = tts.TextToSpeechClient()
    response = client.synthesize_speech(
        input=text_input,
        voice=voice_params,
        audio_config=audio_config,
    )

    filename = f"{voice_name}.wav"
    with open(filename, "wb") as out:
        out.write(response.audio_content)
        print(f'Generated speech saved to "{filename}"')

text_to_wav("de-DE-Standard-C", "Hallo, wie geht es Ihnen? Ich hoffe, es geht Ihnen gut.")



def play_wav(filename: str):
    audio = AudioSegment.from_wav(filename)
    play(audio)

# After generating the audio file
text_to_wav("de-DE-Standard-C", "Hallo, wie geht es Ihnen? Ich hoffe, es geht Ihnen gut.")
play_wav("de-DE-Standard-C.wav")