In [1]:
import speech_recognition as sr
from gtts import gTTS
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [2]:
def listen_for_wake_word():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening for wake-up word...")
        audio = r.listen(source)

    try:
        print("Processing...")
        text = r.recognize_google(audio)
        if "bob" in text.lower():
            return True
    except sr.UnknownValueError:
        pass
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service")
    return False

In [3]:
def transcribe_audio():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening for story input...")
        audio = r.listen(source)

    try:
        print("Transcribing...")
        text = r.recognize_google(audio)
        print("Transcription:", text)
        return text
    except sr.UnknownValueError:
        print("Speech Recognition could not understand the audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service")
    return None


In [4]:
# Function to generate story prompt

model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

def generate_story_prompt(prompt):
    # Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Generate story completion
    output = model.generate(input_ids, max_length=150, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95)

    # Decode and print the generated story
    generated_story = tokenizer.decode(output[0], skip_special_tokens=True)
    
    return generated_story

In [5]:
# Function to convert text to speech
def text_to_speech(text, output_path):
    tts = gTTS(text=text, lang='en')
    tts.save(output_path)

In [6]:
import os


# Main Program
if __name__ == "__main__":
    # Specify the directory to save the generated audio
    output_directory = r"C:\Users\Krishna Patel\Downloads"

    # Ensure the directory exists, create it if not
    os.makedirs(output_directory, exist_ok=True)

    # Listen for wake-up word
    while not listen_for_wake_word():
        pass

    # Transcribe audio to text
    story_input = transcribe_audio()
    
    if story_input:
        # Generate story prompt
        prompt = generate_story_prompt(story_input)

        # Convert text to speech
        output_path = os.path.join(output_directory, "generated_story_output.mp3")
        text_to_speech(prompt, output_path)

    print("End of Program")


Listening for wake-up word...
Processing...
Listening for story input...
Transcribing...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Transcription: stop
End of Program
