In [None]:
import io
import wave
import numpy as np
import sounddevice as sd
import PySimpleGUI as sg
import openai
import keyring
import threading
from queue import Queue
from typing import List
from datetime import datetime

openai.api_key =keyring.get_password("system", "openai_key")


# Define a function to save the numpy array as a WAV file in memory
def save_wav_in_memory(audio_data: List[np.ndarray], sample_rate: int) -> bytes:
    byte_io = io.BytesIO()
    with wave.open(byte_io, 'wb') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(sample_rate)
        for chunk in audio_data:
            wav_file.writeframes(chunk.astype(np.int16).tobytes())
    byte_io.seek(0)
    return byte_io.read()

# Define audio recording parameters
fs = 44100  # Sample rate

# Callback function for audio recording
def audio_callback(indata, frames, time, status, q):
    q.put(indata.copy())

# Function for recording audio on a separate thread
def record_audio(q, stop_event):
    with sd.InputStream(callback=lambda *args: audio_callback(*args, q), samplerate=fs):
        while not stop_event.is_set():
            sd.sleep(100)  # Sleep for a short duration to reduce CPU usage


# Define GUI layout
layout = [
    [sg.Button('Start Recording'), sg.Button('Stop Recording')],
    [sg.Multiline(size=(60, 20), key='transcription')],
]

# Create GUI window
window = sg.Window('Meeting Transcription', layout)

# Create an event to signal recording thread to stop
stop_event = threading.Event()

# Create a queue to hold audio data chunks
audio_queue = Queue()

# Event loop
recording_thread = None
while True:
    event, values = window.read()
    if event == sg.WIN_CLOSED:
        break
    elif event == 'Start Recording':
        # Start recording on a separate thread
        stop_event.clear()
        recording_thread = threading.Thread(target=record_audio, args=(audio_queue, stop_event))
        recording_thread.start()
    elif event == 'Stop Recording':
        # Signal the recording thread to stop
        stop_event.set()
        if recording_thread is not None:
            recording_thread.join()
        # Collect audio data from the queue
        audio_data = []
        while not audio_queue.empty():
            audio_data.append(audio_queue.get())
        # Convert recording to audio file
        audio_bytes = save_wav_in_memory(audio_data, fs)
        current_timestamp = datetime.now()       
        temp_audio_path = current_timestamp.strftime("temp_audio_%Y%m%d_%H%M%S.wav")

        # Save the audio to a temporary WAV file
        with open(temp_audio_path, "wb") as f:
            f.write(audio_bytes)

        file = open(temp_audio_path, "rb")
        transcription = openai.Audio.transcribe("whisper-1", file)

        print(transcription)
        # Update GUI with transcribed text
        window['transcription'].update(transcription['text'])

# Close GUI window
window.close()


{
  "text": ""
}


In [2]:
import sounddevice as sd

# Get the list of all devices
devices = sd.query_devices()

# Filter input devices
input_devices = [device for device in devices if device['max_input_channels'] > 0]

# Print input devices
for idx, device in enumerate(input_devices):
    print(f"Input Device ID: {device['hostapi']} / {device['name']} / Max Input Channels: {device['max_input_channels']}")

# If 

Input Device ID: 0 / HD-Audio Generic: ALC257 Analog (hw:1,0) / Max Input Channels: 2
Input Device ID: 0 / acp: - (hw:2,0) / Max Input Channels: 2
Input Device ID: 0 / ThinkPad USB-C Dock Gen2 USB Au: Audio (hw:3,0) / Max Input Channels: 1
