# STT TTS 합치기

In [2]:
import gradio as gr
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
########## GPT 호출 #########

def request_gpt(prompt):

    endpoint = gpt_endpoint

    # method : POST

    headers = {
        "Content-Type": "application/json",
        "api-key": gpt_key
    }

    body = {
        "messages": [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": "너는 나에게 도움을 주는 AI 도우미야."
                    }
                ]
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    }
                ]
            } 
        ],
        "temperature": 0,
        "top_p": 0.9,
        "max_tokens": 800
    }

    response = requests.post(endpoint, headers = headers, json=body)

    if response.status_code == 200:

        response_json = response.json()
        message = response_json['choices'][0]['message']
        role = message['role']
        content = message['content']
        return content

    else:
        return ""



####### STT ########
def request_stt(file_path):
    # endpoint
    endpoint = end_point

    query_params = {
        "language" : "ko-KR",
        "format" : "detailed"
    }
    # method:POST
    # headers
    headers = {
        "Content-Type" : "audio/wav",
        "Ocp-Apim-Subscription-Key" : stt_key

    }
    # body
    with open(file_path, "rb") as audio:
        audio_data = audio.read()


    response = requests.post(endpoint, params=query_params, headers=headers, data = audio_data)
    print(response.status_code, response.text)


    if response.status_code == 200:
        response_json = response.json()
        text = response_json['DisplayText']

        return text
    
    else:
        return ""

######### TTS ############

def request_tts(text):
    file_name = "response_audio.wav"
    endpoint = tts_endpoint
    headers = {
        "Content-Type" : "application/ssml+xml",
        "X-Microsoft-OutputFormat" : "audio-16khz-128kbitrate-mono-mp3",
        "Ocp-Apim-Subscription-Key" : tts_key

    }
    body = f""" 
        <speak version='1.0' xml:lang='ko-KR'>
            <voice xml:lang='en-US' xml:gender='Female' name='ko-KR-GookMinNeural'>
                <prosody rate="50%">
                    {text}
                </prosody>
            </voice>
        </speak>
    """

    response = requests.post(endpoint, headers = headers, data=body)
    print(response.status_code, response.text)

    if response.status_code == 200:

        # 파일로 저장
        with open(file_name, 'wb') as audio_file:
            audio_file.write(response.content)
        return file_name

    else:
        return None
    

#request_tts("안녕 반가워")

#request_stt("data/audio1.wav")

with gr.Blocks() as demo:

    gr.Markdown("# AI Speech World!")

    def change_audio(audio_path):
        if audio_path:
            text = request_stt(audio_path)
            return text
        else:
            return ""

    def click_send(text):
        file_path = request_tts(text)
        if file_path:
            return file_path
        else:
            return None
        
    def click_gpt_send(prompt, histories):
        content = request_gpt(prompt)
        # histories.append({"role" : "user", "content" : prompt})
        # if content:
        #     histories.append({"role" : "assistant", "content" : content})
        # else: 
        #     histories.append({"role" : "assistant", "content" : "응답을 받지 못했습니다"})
        
        if content:
            histories.append((prompt, content))
        else: 
            histories.append(( "assistant","응답을 받지 못했습니다"))

        return "", histories

    with gr.Row():

        # 좌측
        with gr.Column(scale = 4):
            chatbot = gr.Chatbot()
            with gr.Row():
                prompt = gr.Textbox(label = "프롬포트", scale=6)
                send_gpt_button = gr.Button("전송", scale=1)
            
            gpt_audio = gr.Audio(interactive=False, autoplay=True)
            send_gpt_button.click(click_gpt_send, inputs=[prompt, chatbot], outputs=[prompt, chatbot])

        # 우측
        with gr.Column(scale=1):
            with gr.Column():
                gr.Markdown("### STT ###")

                input_mic = gr.Audio(label="마이크 입력", sources="microphone", type="filepath", show_download_button=True)
                output_textbox = gr.Textbox(label="텍스트", interactive=False)

                input_mic.change(change_audio, inputs=[input_mic], outputs=[output_textbox])

            with gr.Column():
                gr.Markdown("### TTS ###")

                tts_input_textbox = gr.Textbox(label="입력", placeholder="음성 변환할 텍스트를 입력하세요")
                send_tts_button = gr.Button('전송')

                output_tts_audio = gr.Audio(interactive=False, autoplay = True)

                send_tts_button.click(fn=click_send, inputs=[tts_input_textbox], outputs=[output_tts_audio])

    demo.launch()

  chatbot = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7877

To create a public link, set `share=True` in `launch()`.
