# Voice ELI5
This notebook uses Azure Speech APIs and an OpenAI LLM via LangChain to answer questions for a 5 year old!

Running the last cell will listen for audio input from your microphone.

## Requirements:
### Libraries
- langchain
- azure-cognitiveservices-speech


### Environment Variables
- OPENAI_API_KEY  : Open AI API Key for LLM
- AZURE_COGS_KEY  : Azure Cognitive Services or Speech API key
- AZURE_COGS_REGION  : Azure Cognitive Services or Speech API region


# Speech SDK Functions

In [1]:
import os
import azure.cognitiveservices.speech as speechsdk

class SpeechAPI():
    def __init__(self, verbose=False):
        self.speech_config = speechsdk.SpeechConfig(subscription=os.environ["AZURE_COGS_KEY"], region=os.environ["AZURE_COGS_REGION"])
        self.speech_config.speech_recognition_language="en-CA"
        self.speech_config.speech_synthesis_voice_name='en-CA-LiamNeural'
        self.verbose = verbose

    def recognize_from_microphone(self):
        # Adapted from Speech to Text Quickstart: 
        # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-to-text?tabs=linux%2Cterminal&pivots=programming-language-python
        audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)

        print("Speak into your microphone.")
        speech_recognition_result = speech_recognizer.recognize_once_async().get()

        if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
            if(self.verbose):
                print("Recognized: {}".format(speech_recognition_result.text))
            return speech_recognition_result.text
        elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
        elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = speech_recognition_result.cancellation_details
            print("Speech Recognition canceled: {}".format(cancellation_details.reason))
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print("Error details: {}".format(cancellation_details.error_details))
                print("Did you set the speech resource key and region values?")

    def text_to_speech(self, text):
        # Adapted from Text to Speech Quickstart
        # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech?tabs=linux%2Cterminal&pivots=programming-language-python
        audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)

        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)

        speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()

        if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            if(self.verbose):
                print("Speech synthesized for text [{}]".format(text))
        elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = speech_synthesis_result.cancellation_details
            print("Speech synthesis canceled: {}".format(cancellation_details.reason))
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                if cancellation_details.error_details:
                    print("Error details: {}".format(cancellation_details.error_details))
                    print("Did you set the speech resource key and region values?")

In [2]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


class Eli5Runner():
    def __init__(self, verbose=False):
        llm = OpenAI(temperature=0)
        prompt = PromptTemplate(
            input_variables=["question"], 
            template="Give a short answer understandable by a 5 year old to the following question: {question}")
        self.chain = LLMChain(llm=llm, prompt=prompt)
        self.speechAPI = SpeechAPI(verbose)
        
    def run(self):
        input_text = self.speechAPI.recognize_from_microphone()
        response_text = self.chain.run(input_text)
        self.speechAPI.text_to_speech(response_text)

In [3]:
explainer = Eli5Runner()

In [4]:
explainer.run()

Speak into your microphone.
