In [13]:
#%pip install flask openai-whisper pyaudio numpy soundfile ffmpeg-python tkinter
#%pip install setuptools-rust
import whisper
import numpy as np
import tempfile
import soundfile as sf
import pyaudio
import tkinter as tk
from tkinter import messagebox

Collecting ffmpeg-python
  Obtaining dependency information for ffmpeg-python from https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl.metadata
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [6]:
# Load Whisper model (you can use "tiny", "base", "small", "medium", or "large")
model = whisper.load_model("small")

In [7]:
# Audio configuration
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000  # Whisper works best at 16kHz
CHUNK = 1024  # Buffer size

# Initialize PyAudio
audio = pyaudio.PyAudio()
stream = None
frames = []
recording = False

def start_recording():
    """Start recording audio from the microphone."""
    global stream, frames, recording
    frames = []
    recording = True
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,
                        frames_per_buffer=CHUNK)
    status_label.config(text="Recording... Speak now!", fg="red")
    record_audio()

def record_audio():
    """Capture audio in the background."""
    global recording
    if recording:
        data = stream.read(CHUNK)
        frames.append(np.frombuffer(data, dtype=np.int16))
        root.after(10, record_audio)  # Schedule next audio chunk

def stop_recording():
    """Stop recording and save audio, then transcribe."""
    global recording, stream
    recording = False
    stream.stop_stream()
    stream.close()
    
    # Save recorded data to a temporary WAV file
    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    sf.write(temp_wav.name, np.concatenate(frames), RATE, format='WAV')

    status_label.config(text="Transcribing...", fg="blue")
    root.after(100, lambda: transcribe_audio(temp_wav.name))

def transcribe_audio(audio_file):
    """Transcribe the recorded audio using Whisper."""
    result = model.transcribe(audio_file)
    transcription_text.set(result["text"])
    status_label.config(text="Transcription Complete!", fg="green")


In [8]:
# if __name__ == "__main__":
#     while True:
#         audio_file = record_audio()
#         print("Transcribing...")
#         transcript = transcribe_audio(audio_file)
#         print(f"Transcription: {transcript}\n")

In [12]:
# GUI Setup
root = tk.Tk()
root.title("Voice Transcriber with Whisper")
root.geometry("400x300")

# Ensure the buttons are properly placed and visible
status_label = tk.Label(root, text="Press 'Start' to record", font=("Arial", 12))
status_label.pack(pady=10, fill="x")

start_button = tk.Button(root, text="Start Recording", command=start_recording, bg="green", fg="white", font=("Arial", 12))
start_button.pack(pady=10, fill="x")

stop_button = tk.Button(root, text="Stop & Transcribe", command=stop_recording, bg="red", fg="white", font=("Arial", 12))
stop_button.pack(pady=10, fill="x")

transcription_text = tk.StringVar()
transcription_label = tk.Label(root, textvariable=transcription_text, wraplength=350, font=("Arial", 12), fg="black")
transcription_label.pack(pady=20, fill="x")

root.mainloop()

