In [None]:
import io
import wave
import numpy as np
import sounddevice as sd
import PySimpleGUI as sg
import openai
import keyring
import threading
from queue import Queue
from typing import List
from datetime import datetime

openai.api_key =keyring.get_password("system", "openai_key")


# Define a function to save the numpy array as a WAV file in memory
def save_wav_in_memory(audio_data: List[np.ndarray], sample_rate: int) -> bytes:
    byte_io = io.BytesIO()
    with wave.open(byte_io, 'wb') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(sample_rate)
        for chunk in audio_data:
            wav_file.writeframes(chunk.astype(np.int16).tobytes())
    byte_io.seek(0)
    return byte_io.read()

# Define audio recording parameters
fs = 44100  # Sample rate

# Callback function for audio recording
def audio_callback(indata, frames, time, status, q):
    q.put(indata.copy())

# Function for recording audio on a separate thread
def record_audio(q, stop_event):
    with sd.InputStream(callback=lambda *args: audio_callback(*args, q), samplerate=fs):
        while not stop_event.is_set():
            sd.sleep(100)  # Sleep for a short duration to reduce CPU usage


# Define GUI layout
layout = [
    [sg.Button('Start Recording'), sg.Button('Stop Recording')],
    [sg.Multiline(size=(60, 20), key='transcription')],
]

# Create GUI window
window = sg.Window('Meeting Transcription', layout)

# Create an event to signal recording thread to stop
stop_event = threading.Event()

# Create a queue to hold audio data chunks
audio_queue = Queue()

# Event loop
recording_thread = None
while True:
    event, values = window.read()
    if event == sg.WIN_CLOSED:
        break
    elif event == 'Start Recording':
        # Start recording on a separate thread
        stop_event.clear()
        recording_thread = threading.Thread(target=record_audio, args=(audio_queue, stop_event))
        recording_thread.start()
    elif event == 'Stop Recording':
        # Signal the recording thread to stop
        stop_event.set()
        if recording_thread is not None:
            recording_thread.join()
        # Collect audio data from the queue
        audio_data = []
        while not audio_queue.empty():
            audio_data.append(audio_queue.get())
        # Convert recording to audio file
        audio_bytes = save_wav_in_memory(audio_data, fs)
        current_timestamp = datetime.now()       
        temp_audio_path = current_timestamp.strftime("temp_audio_%Y%m%d_%H%M%S.wav")

        # Save the audio to a temporary WAV file
        with open(temp_audio_path, "wb") as f:
            f.write(audio_bytes)

        file = open(temp_audio_path, "rb")
        transcription = openai.Audio.transcribe("whisper-1", file)

        print(transcription)
        # Update GUI with transcribed text
        window['transcription'].update(transcription['text'])

# Close GUI window
window.close()


{
  "text": ""
}


In [2]:
import sounddevice as sd

# Get the list of all devices
devices = sd.query_devices()

# Filter input devices
input_devices = [device for device in devices if device['max_input_channels'] > 0]

# Print input devices
for idx, device in enumerate(input_devices):
    print(f"Input Device ID: {device['hostapi']} / {device['name']} / Max Input Channels: {device['max_input_channels']}")

# If 

Input Device ID: 0 / HD-Audio Generic: ALC257 Analog (hw:1,0) / Max Input Channels: 2
Input Device ID: 0 / acp: - (hw:2,0) / Max Input Channels: 2
Input Device ID: 0 / ThinkPad USB-C Dock Gen2 USB Au: Audio (hw:3,0) / Max Input Channels: 1


In [8]:
import threading
import sounddevice as sd
import numpy as np
import queue
from datetime import datetime
import openai
import keyring
import io
import wave

import PySimpleGUI as sg
# Define audio recording parameters
fs = 44100  # Sample rate
recording_duration = 5  # Duration of recording for noise baseline in seconds
chunk_duration = 0.5  # Duration of each chunk in seconds
chunk_samples = int(fs * chunk_duration)  # Number of samples in each chunk
threshold_multiplier = 1.5  # Multiplier for noise threshold

# Define a function to save the numpy array as a WAV file in memory
def save_wav_in_memory(audio_data, sample_rate):
    byte_io = io.BytesIO()
    with wave.open(byte_io, 'wb') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(sample_rate)
        wav_file.writeframes(audio_data.astype(np.int16).tobytes())
    byte_io.seek(0)
    return byte_io.read()

# Calculate root-mean-square (RMS) energy of a chunk
def calculate_rms(chunk):
    return np.sqrt(np.mean(np.square(chunk)))

# Simple Voice Activity Detection (VAD)
def is_voice_active(chunk, threshold):
    return calculate_rms(chunk) > threshold

# Define a function for recording audio on a separate thread
def record_audio(audio_queue, stop_event, threshold):
    buffer = np.array(([],[]))
    active_chunk = np.array(([],[]))
    with sd.InputStream(samplerate=fs) as stream:
        while not stop_event.is_set():
            chunk = stream.read(chunk_samples)[0]
            if is_voice_active(chunk, threshold):
                # If active voice detected, append chunk to active_chunk
                active_chunk = np.concatenate((active_chunk, chunk))
            elif len(active_chunk) > 0:
                # If pause detected, put the active_chunk in queue for transcription
                audio_queue.put(active_chunk)
                active_chunk = np.array(([],[]))
            # Append chunk to buffer for noise threshold calculation
            buffer = np.concatenate((buffer, chunk))
            if len(buffer) > fs * recording_duration:
                # Calculate threshold based on RMS energy of recorded buffer
                threshold = threshold_multiplier * calculate_rms(buffer)
                buffer = np.array(([],[]))

# OpenAI API key
openai.api_key = keyring.get_password("system", "openai_key")

# Define GUI layout
layout = [
    [sg.Button('Start Recording'), sg.Button('Stop Recording')],
    [sg.Multiline(size=(60, 20), key='transcription')],
]

# Create GUI window
window = sg.Window('Meeting Transcription', layout)

# Create an event to signal recording thread to stop
stop_event = threading.Event()

# Create a queue to hold audio data chunks
audio_queue = queue.Queue()

# Set an initial noise threshold
noise_threshold = 0.01

# Event loop
recording_thread = None
while True:
    event, values = window.read()
    if event == sg.WIN_CLOSED:
        break
    elif event == 'Start Recording':
        # Start recording on a separate thread
        stop_event.clear()
        recording_thread = threading.Thread(target=record_audio, args=(audio_queue, stop_event, noise_threshold))
        recording_thread.start()
    elif event == 'Stop Recording':
        # Signal the recording thread to stop
        stop_event.set()
        if recording_thread is not None:
            recording_thread.join()
        # Process remaining active chunks in the queue
        transcriptions = []
        while not audio_queue.empty():
            active_chunk = audio_queue.get()
            audio_bytes = save_wav_in_memory(active_chunk, fs)
            # Save the audio to a temporary WAV file
            with open("temp_audio.wav", "wb") as f:
                f.write(audio_bytes)
            # Transcribe the audio using OpenAI's Whisper ASR API
            file = open("temp_audio.wav", "rb")
            transcription = openai.Audio.transcribe("whisper-1", file)
            transcriptions.append(transcription['choices'][0]['text'])
        # Update GUI with transcribed text (separated by newlines)
        window['transcription'].update('\n'.join(transcriptions))

# Close GUI window
window.close()


Exception in thread Thread-8 (record_audio):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_489175/2010648215.py", line 53, in record_audio
  File "<__array_function__ internals>", line 200, in concatenate
ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 0 and the array at index 1 has size 2
Exception in thread Thread-9 (record_audio):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_489175/2010648215.py", line 53, in record_audio
  File "<__array_function__ internals>", line 200, in 

In [6]:
import sounddevice as sd
import numpy as np
import threading
import queue
import openai
import keyring
import io
import wave
from datetime import datetime

# Define audio recording parameters
fs = 44100  # Sample rate
recording_duration = 5  # Duration of recording for noise baseline in seconds
chunk_duration = 0.5  # Duration of each chunk in seconds
chunk_samples = int(fs * chunk_duration)  # Number of samples in each chunk
threshold_multiplier = 1.5  # Multiplier for noise threshold

# Define a function to save the numpy array as a WAV file in memory
def save_wav_in_memory(audio_data):
    byte_io = io.BytesIO()
    with wave.open(byte_io, 'wb') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(fs)
        wav_file.writeframes(audio_data.astype(np.int16).tobytes())
    byte_io.seek(0)
    return byte_io.read()

# Calculate root-mean-square (RMS) energy of a chunk
def calculate_rms(chunk):
    return np.sqrt(np.mean(np.square(chunk)))

# Simple Voice Activity Detection (VAD)
def is_voice_active(chunk, threshold):
    return calculate_rms(chunk) > threshold

# Define a function for recording audio and transcribing chunks
def record_and_transcribe(stop_event, threshold):
    buffer = np.array([])
    active_chunk = np.array([])
    with sd.InputStream(samplerate=fs) as stream:
        while not stop_event.is_set():
            chunk, _ = stream.read(chunk_samples)  # Read chunk from the stream
            chunk = chunk.flatten()  # Flatten the chunk to 1 dimension
            if is_voice_active(chunk, threshold):
                # If active voice detected, append chunk to active_chunk
                active_chunk = np.concatenate((active_chunk, chunk))
            elif len(active_chunk) > 0:
                # If pause detected, transcribe the active_chunk
                audio_bytes = save_wav_in_memory(active_chunk)
                # Save the audio to a temporary WAV file
                with open("temp_audio.wav", "wb") as f:
                    f.write(audio_bytes)
                # Transcribe the audio using OpenAI's Whisper ASR API
                file = open("temp_audio.wav", "rb")
                transcription = openai.Audio.transcribe("whisper-1", file)
                # Print transcribed text to console
                print(transcription['choices'][0]['text'])
                active_chunk = np.array([])
            # Append chunk to buffer for noise threshold calculation
            buffer = np.concatenate((buffer, chunk))
            if len(buffer) > fs * recording_duration:
                # Calculate threshold based on RMS energy of recorded buffer
                threshold = threshold_multiplier * calculate_rms(buffer)
                buffer = np.array([])

# OpenAI API key
openai.api_key = keyring.get_password("system", "openai_key")

# Create an event to signal recording thread to stop
stop_event = threading.Event()

# Set an initial noise threshold
noise_threshold = 0.01

# Start recording and transcribing on a separate thread
recording_thread = threading.Thread(target=record_and_transcribe, args=(stop_event, noise_threshold))
recording_thread.start()

# Prompt user to press Enter to stop recording
input("Recording started. Press Enter to stop recording...\n")

# Signal the recording thread to stop
stop_event.set()
recording_thread.join()


print("Recording and transcription complete.")


Exception in thread Thread-9 (record_and_transcribe):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
Expression 'ret' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 1736
Expression 'AlsaOpen( &alsaApi->baseHostApiRep, params, streamDir, &self->pcm )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 1904
Expression 'PaAlsaStreamComponent_Initialize( &self->capture, alsaApi, inParams, StreamDirection_In, NULL != callback )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 2171
Expression 'PaAlsaStream_Initialize( stream, alsaHostApi, inputParameters, outputParameters, sampleRate, framesPerBuffer, callback, streamFlags, userData )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 2839
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_497790/376463701.py", line 41, in record_and_transcribe
  File "/home/srudloff/.local/lib

Recording started. Press Enter to stop recording...
 


Recording and transcription complete.
