In [5]:
import pyaudio
import wave
import keyboard
import threading
import os
import whisper

class AudioRecorder:
    def __init__(self, save_dir="./recordings", filename="output.wav", model_name="small.en"):
        # Audio recording parameters
        self.FORMAT = pyaudio.paInt16  # 16-bit resolution
        self.CHANNELS = 1               # Mono channel
        self.RATE = 44100               # 44.1kHz sampling rate
        self.CHUNK = 1024               # 1024 samples per frame

        self.SAVE_DIR = save_dir        # Desired directory path
        self.FILENAME = filename         # Output file name
        self.OUTPUT_FILENAME = os.path.join(self.SAVE_DIR, self.FILENAME)

        # Ensure the directory exists
        if not os.path.exists(self.SAVE_DIR):
            os.makedirs(self.SAVE_DIR)

        # Initialize pyaudio
        self.audio = pyaudio.PyAudio()
        self.recording = False
        self.frames = []
        self.stream = None

        # Initialize Whisper model
        self.model = whisper.load_model(model_name)

    def start_recording(self):
        self.recording = True
        self.frames = []  # Reset frames for a new recording
        
        # Open audio stream
        self.stream = self.audio.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE,
                                       input=True, frames_per_buffer=self.CHUNK)

        print("Recording started...")
        
        # Start a separate thread for recording
        recording_thread = threading.Thread(target=self.record)
        recording_thread.start()

    def record(self):
        while self.recording:
            data = self.stream.read(self.CHUNK)
            self.frames.append(data)

    def stop_recording(self):
        self.recording = False
        self.stream.stop_stream()
        self.stream.close()

        # Save the recorded audio to a file
        with wave.open(self.OUTPUT_FILENAME, 'wb') as wf:
            wf.setnchannels(self.CHANNELS)
            wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
            wf.setframerate(self.RATE)
            wf.writeframes(b''.join(self.frames))

        print(f"Recording saved to {self.OUTPUT_FILENAME}")
        self.transcribe_audio()

    def transcribe_audio(self):
        result = self.model.transcribe(self.OUTPUT_FILENAME)
        print(result["text"])

    def listen_for_key(self):
        print("Press and hold the spacebar to record.")
        print("Release the spacebar to stop recording.")
        print("Press Esc to terminate the program.")

        while True:
            # When spacebar is pressed, start recording
            if keyboard.is_pressed('space'):
                if not self.recording:
                    self.start_recording()

            # When spacebar is released, stop recording
            elif self.recording:
                self.stop_recording()

            # If the Esc key is pressed, terminate the program
            if keyboard.is_pressed('esc'):
                if self.recording:
                    self.stop_recording()
                print("Terminating program...")
                break

    def cleanup(self):
        self.audio.terminate()

if __name__ == "__main__":
    recorder = AudioRecorder()
    try:
        recorder.listen_for_key()
    finally:
        recorder.cleanup()


Press and hold the spacebar to record.
Release the spacebar to stop recording.
Press Esc to terminate the program.
Recording started...
Recording saved to ./recordings\output.wav
 Testing, testing, one, two, three.
Terminating program...


In [1]:
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    print(f"GPU is enabled. {torch.cuda.get_device_name(0)} is available.")
else:
    print("GPU is not enabled. Running on CPU.")

GPU is enabled. NVIDIA GeForce RTX 4080 Laptop GPU is available.
