In [1]:
from PyQt5.QtWidgets import QApplication, QPushButton, QVBoxLayout, QWidget
from PyQt5.QtCore import QThread, pyqtSignal
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np

class RecordThread(QThread):
    data_ready = pyqtSignal(np.ndarray)

    def __init__(self, samplerate=16000):
        super().__init__()
        self.samplerate = samplerate
        self.record = False
        self.mydata = np.array([])

    def run(self):
        self.record = True
        self.mydata = np.array([])
        while self.record:
            mydata_chunk = sd.rec(int(self.samplerate), samplerate=self.samplerate, channels=1, blocking=True)
            self.mydata = np.append(self.mydata, mydata_chunk)

        self.data_ready.emit(self.mydata)

    def stop(self):
        self.record = False


class MyApp(QWidget):
    def __init__(self):
        super().__init__()

        self.recording_thread = RecordThread()
        self.recording_thread.data_ready.connect(self.save_data)

        self.initUI()

    def initUI(self):
        vbox = QVBoxLayout()

        self.start_button = QPushButton('Start Recording', self)
        self.start_button.clicked.connect(self.start_recording)

        self.end_button = QPushButton('End Recording', self)
        self.end_button.clicked.connect(self.end_recording)

        vbox.addWidget(self.start_button)
        vbox.addWidget(self.end_button)

        self.setLayout(vbox)
        self.setWindowTitle('Voice Recorder')
        self.setGeometry(300, 300, 300, 200)
        self.show()

    def start_recording(self):
        self.start_button.setEnabled(False)
        self.end_button.setEnabled(True)
        self.recording_thread.start()

    def end_recording(self):
        self.start_button.setEnabled(True)
        self.end_button.setEnabled(False)
        self.recording_thread.stop()

    def save_data(self, data):
        write('user_input_voice_recording.wav', self.recording_thread.samplerate, data)


if __name__ == '__main__':
    import sys

    app = QApplication(sys.argv)
    ex = MyApp()
    sys.exit(app.exec_())


In [None]:
import openai
from scipy.io.wavfile import write, read

# Set OpenAI's and Whisper's API keys
openai.organization = "org-iaAwuprljYaoOoSAlUNCnaJQ"
openai.api_key = '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'  # Replace with your OpenAI key
#openai.api_key = os.environ.get("OPENAI_API_KEY")

audio_file= open("user_input_voice_recording.wav", "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
transcript["text"]
gpt_response = openai.ChatCompletion.create(
  model="gpt-4",
  messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": transcript["text"]},
    ]
)

gpt_res_text = gpt_response['choices'][0]['message']['content']

# Print text response of ChatGPT
print(gpt_res_text)
from gtts import gTTS

# Create a gTTS object with the text to be converted
gpt_res_t2s = gTTS(gpt_res_text)

# Save the audio file
gpt_res_t2s.save("gpt-response-text-to-speech.mp3")

from pydub import AudioSegment
from pydub.playback import play

# Load the audio file
audio = AudioSegment.from_mp3("/Users/mikehemberger/Documents/vscode/openAI/gpt-response-text-to-speech.mp3")

# Play the audio file
play(audio)
