Language practicing app using GPT API.

user voice input => speech 2 text (speech_recognition) => GPT chat API => text 2 speech (pyttsx3) => loop

In [9]:
import os
import re
import openai
import json
from IPython.display import Markdown
import speech_recognition as sr
import time
import pyttsx3


In [10]:
openai.organization = os.environ['OPENAI_ORGANIZATION']
openai.api_key = os.environ['OPENAI_API_KEY']

In [11]:
def color_print(s, color='black', weight='400'):
    display (Markdown(f'<span style="color: {color}; font-weight: {weight}">{s}</span>'))
    
def system_message(s):
    color_print(s, color='DarkGreen', weight=500)

def assistant_message(s):
    color_print(s, color='Coral', weight=500)

def user_message(s):
    color_print(s, color='Black', weight=500)
    
def debug_message(s):
    color_print(s, color='DarkTurquoise', weight=500)

In [12]:
def remove_non_japanese_letters(text):
    return re.sub('[a-zA-Z0-9()!?！？（） \'"]', '', text)

def output(content, output_mode='audio', language='English', rate=200):
    if output_mode=='audio':
        engine = pyttsx3.init()

        engine.setProperty('rate', rate)

        voices = engine.getProperty('voices') 
        if language == 'English':
            engine.setProperty('voice', 'com.apple.speech.synthesis.voice.samantha')  
        elif language == 'Japanese':
            engine.setProperty('voice', 'com.apple.speech.synthesis.voice.kyoko')  
            content = remove_non_japanese_letters(content)
        engine.say(content)
        engine.runAndWait()

    return content

def get_input(input_mode='audio', language='English', timeout=5, pause=2):
    if input_mode == 'text':
        return input()
    elif input_mode == 'audio':
        def get_audio_input():
            r = sr.Recognizer()
            mic = sr.Microphone()
            with mic as source:
                r.adjust_for_ambient_noise(source)
                if language == 'English':
                    system_message('waiting for microphone input(say "quit" to quit):')
                elif language == 'Japanese':
                    system_message('waiting for microphone input(say "終了" to quit):')
                audio = r.listen(source, timeout=timeout)
            if language == 'English':
                text = r.recognize_google(audio)
            elif language == 'Japanese':
                text = r.recognize_google(audio, language='ja')
            return text
        text = None
        while text is None:
            try:
                text = get_audio_input()
            except Exception as e:
                system_message('failed to get microphone input. Trying again..')
                time.sleep(pause)
        user_message(text)
        return text
    else:
        raise f'input_mode {input_mode} not supported'


In [13]:
def pick_topic(language='English'):
    system_message("let's pick a topic!")
    prompt = f'''List 5 random topics to talk about. Topics should be in {language}. Each topic should be short like within 3 words in the JSON list format like following:
    ["best vacation spots",
     "favorite book character",
     "go-to comfort food",
     "dream job fantasy",
     "bucket list activities"]
    '''
    completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", 
                                              messages=[{"role": "user", "content": prompt}])
    topics_cands = json.loads(completion['choices'][0]['message'].to_dict()['content'])
    system_message(f'topic_list: \n {topics_cands}')  
    system_message('select a topic from the list above or input your own topic:')
    user_input = input()
    try:
        num = int(user_input)
        if 1 <= num <= 5:
            topic = topics_cands[num-1]
        else:
            topic = user_input
    except Exception as e:
        topic = user_input
    return topic

def conversation(topic, language='English', temperature=1.0, len_limit=10, input_mode='audio', output_mode='text', debug=False, pause=2):
    total_tokens = 0

    initial_system_input = f"""You are a helpful assistant and you are going to have a conversation with the user in {language}!
    Keep your reponses short enough to sound like a natural dialogue.
    User's goal is to practice speaking, so please guide the user in the conversation and try to ask many questions to the user.
    """
    if input_mode == 'audio':
        initial_system_input += 'The user uses audio input, so the contents given from the user sometimes are unrecognizable. Just ignore such parts and try to keep the flow of the conversation.'
    if language == 'Japanese':
        initial_system_input += 'Give an English translation after your reponse. Please put furigana right after every Kanji you use.'
    initial_prompt = f"Let's talk about {topic}!"
    messages = [
        {'role': 'system', 'content': initial_system_input},
        {'role': 'user', 'content': initial_prompt}
    ]

    system_message(f'topic: {topic}')
    while True:
        completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", 
                                                  messages=messages, temperature=temperature)
        assistant_response = completion['choices'][0]['message']
        assistant_response_content = completion['choices'][0]['message']['content']

        assistant_message(assistant_response_content)
        if output_mode == 'audio':
            output(assistant_response_content, output_mode=output_mode, language=language)
        if output_mode != 'audio':
            time.sleep(pause)

        tokens = completion['usage']['total_tokens']
        total_tokens += tokens

        user_response_content = get_input(input_mode=input_mode, language=language, pause=pause)
        
        if 'quit' in user_response_content or '終了' in user_response_content:
            break
        user_response = {'role': 'user', 'content': user_response_content}

        messages.append(assistant_response)
        messages.append(user_response)
        if debug:
            debug_message(messages)
        if len(messages) >= len_limit:
            # drop earlier 2 messages but keep the first 4 (probably important) messages.
            messages = messages[:4] + messages[6:]

    system_message(f'conversation ended. tokens used={total_tokens}')


In [14]:
language = 'English'
topic = pick_topic(language=language)
conversation(topic, language=language, input_mode='audio', output_mode='audio', debug=False)

<span style="color: DarkGreen; font-weight: 500">let's pick a topic!</span>

<span style="color: DarkGreen; font-weight: 500">topic_list: 
 ['famous landmarks', 'popular TV shows', 'hobbies and interests', 'favorite childhood memory', 'innovative technology']</span>

<span style="color: DarkGreen; font-weight: 500">select a topic from the list above or input your own topic:</span>

5


<span style="color: DarkGreen; font-weight: 500">topic: innovative technology</span>

<span style="color: Coral; font-weight: 500">Great! What kind of innovative technology are you interested in?</span>

<span style="color: DarkGreen; font-weight: 500">waiting for microphone input(say "quit" to quit):</span>

<span style="color: Black; font-weight: 500">I think recent chat chat AIS like Chachi BT I really invited and I think they will change the world</span>

<span style="color: Coral; font-weight: 500">Yes, chatbots and AI assistants like ChattyBot and Siri are certainly innovative technologies that are changing the way we interact with machines. Have you used any of these chatbots before?</span>

<span style="color: DarkGreen; font-weight: 500">waiting for microphone input(say "quit" to quit):</span>

<span style="color: Black; font-weight: 500">Yes actually this is the fact that we are making this conversation is that I'm using the chat GPT now precisely speaking GPT 3.5</span>

<span style="color: Coral; font-weight: 500">Interesting! GPT-3 is one of the most powerful language models out there right now. How do you find using it as a chatbot? Do you think it's a convenient tool for communication?</span>

<span style="color: DarkGreen; font-weight: 500">waiting for microphone input(say "quit" to quit):</span>

<span style="color: Black; font-weight: 500">quit</span>

<span style="color: DarkGreen; font-weight: 500">conversation ended. tokens used=595</span>