In [1]:
import os
import json
import whisper
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import threading
import logging

# Configure logging
logging.basicConfig(filename="transcription.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Default model size
MODEL_SIZE = "tiny"

class TranscriptionApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Media File Transcriber")
        self.root.geometry("600x450")

        # Variables
        self.input_path = tk.StringVar()
        self.output_folder = tk.StringVar()
        self.progress = tk.DoubleVar()
        self.status = tk.StringVar(value="Ready")
        self.model_size = tk.StringVar(value=MODEL_SIZE)
        self.translate = tk.BooleanVar(value=False)

        # GUI Elements
        self.create_widgets()

        # Load Whisper model
        self.model = None
        self.load_model()

    def create_widgets(self):
        """Create and arrange GUI elements."""
        main_frame = ttk.Frame(self.root, padding="10")
        main_frame.pack(fill=tk.BOTH, expand=True)

        # Input Selection (Folder or File)
        ttk.Label(main_frame, text="Input (Folder or File):").grid(row=0, column=0, padx=10, pady=5, sticky="w")
        ttk.Entry(main_frame, textvariable=self.input_path, width=50).grid(row=0, column=1, padx=10, pady=5)
        ttk.Button(main_frame, text="Browse Folder", command=self.browse_input_folder).grid(row=0, column=2, padx=5, pady=5)
        ttk.Button(main_frame, text="Browse File", command=self.browse_input_file).grid(row=1, column=2, padx=5, pady=5)

        # Output Folder
        ttk.Label(main_frame, text="Output Folder:").grid(row=1, column=0, padx=10, pady=5, sticky="w")
        ttk.Entry(main_frame, textvariable=self.output_folder, width=50).grid(row=1, column=1, padx=10, pady=5)
        ttk.Button(main_frame, text="Browse", command=self.browse_output_folder).grid(row=2, column=2, padx=5, pady=5)

        # Model Size Selection
        ttk.Label(main_frame, text="Model Size:").grid(row=2, column=0, padx=10, pady=5, sticky="w")
        model_options = ["tiny", "base", "small", "medium", "large"]
        model_menu = ttk.Combobox(main_frame, textvariable=self.model_size, values=model_options)
        model_menu.grid(row=2, column=1, padx=10, pady=5)
        model_menu.current(0)
        model_menu.bind("<<ComboboxSelected>>", lambda event: self.load_model())

        # Translate Option
        ttk.Checkbutton(main_frame, text="Translate to English", variable=self.translate).grid(row=3, column=0, columnspan=2, padx=10, pady=5, sticky="w")

        # Progress Bar
        self.progress_bar = ttk.Progressbar(main_frame, variable=self.progress, maximum=100)
        self.progress_bar.grid(row=4, column=0, columnspan=3, padx=10, pady=10, sticky="we")

        # Status Label
        ttk.Label(main_frame, textvariable=self.status).grid(row=5, column=0, columnspan=3, padx=10, pady=5)

        # Buttons
        ttk.Button(main_frame, text="Start Transcription", command=self.start_transcription_thread).grid(row=6, column=0, columnspan=3, padx=10, pady=5)
        ttk.Button(main_frame, text="Clear", command=self.clear_fields).grid(row=7, column=0, columnspan=3, padx=10, pady=5)

    def browse_input_folder(self):
        """Open a dialog to select an input folder."""
        folder = filedialog.askdirectory()
        if folder:
            self.input_path.set(folder)

    def browse_input_file(self):
        """Open a dialog to select a single media file."""
        file_path = filedialog.askopenfilename(filetypes=[("Media Files", "*.mp3;*.wav;*.mp4;*.avi;*.mkv;*.flac;*.m4a")])
        if file_path:
            self.input_path.set(file_path)

    def browse_output_folder(self):
        """Open a dialog to select the output folder."""
        folder = filedialog.askdirectory()
        if folder:
            self.output_folder.set(folder)

    def load_model(self):
        """Load the Whisper model based on the selected size."""
        self.status.set("Loading Whisper model...")
        self.root.update_idletasks()
        try:
            self.model = whisper.load_model(self.model_size.get())
            self.status.set("Model loaded successfully.")
        except Exception as e:
            logging.error(f"Failed to load Whisper model: {e}")
            messagebox.showerror("Error", f"Failed to load Whisper model: {e}")
            self.status.set("Failed to load model.")

    def is_media_file(self, filename):
        """Check if a file is a media file."""
        media_extensions = [".mp3", ".wav", ".mp4", ".avi", ".mkv", ".flac", ".m4a"]
        return any(filename.lower().endswith(ext) for ext in media_extensions)

    def transcribe_media_file(self, file_path):
        """Transcribe a media file using Whisper."""
        try:
            result = self.model.transcribe(file_path, task="translate" if self.translate.get() else "transcribe")
            return result["text"]
        except Exception as e:
            logging.error(f"Failed to transcribe {file_path}: {e}")
            self.root.after(0, messagebox.showerror, "Error", f"Failed to transcribe {file_path}: {e}")
            return None

    def save_transcription(self, file_path, transcription):
        """Save the transcription as a text and JSON file."""
        base_name = os.path.splitext(os.path.basename(file_path))[0]
        output_folder = self.output_folder.get()
        os.makedirs(output_folder, exist_ok=True)

        output_txt_path = os.path.join(output_folder, f"{base_name}.txt")
        with open(output_txt_path, "w", encoding="utf-8") as txt_file:
            txt_file.write(transcription)

        output_json_path = os.path.join(output_folder, f"{base_name}.json")
        with open(output_json_path, "w", encoding="utf-8") as json_file:
            json.dump({"file_path": file_path, "transcription": transcription}, json_file, indent=4)

    def start_transcription_thread(self):
        """Start the transcription process in a separate thread."""
        threading.Thread(target=self.start_transcription, daemon=True).start()

    def start_transcription(self):
        """Start the transcription process."""
        input_path = self.input_path.get()
        output_folder = self.output_folder.get()

        if not input_path or not output_folder:
            messagebox.showwarning("Warning", "Please select an input file/folder and output folder.")
            return

        media_files = [input_path] if os.path.isfile(input_path) else [
            os.path.join(root, file) for root, _, files in os.walk(input_path) for file in files if self.is_media_file(file)
        ]

        if not media_files:
            messagebox.showinfo("Info", "No media files found.")
            return

        total_files = len(media_files)
        for i, file_path in enumerate(media_files):
            self.root.after(0, self.status.set, f"Processing {os.path.basename(file_path)} ({i+1}/{total_files})...")
            self.root.after(0, self.progress.set, (i + 1) / total_files * 100)

            transcription = self.transcribe_media_file(file_path)
            if transcription:
                self.save_transcription(file_path, transcription)

        self.root.after(0, self.status.set, "Transcription complete!")
        messagebox.showinfo("Info", "Transcription completed successfully.")

    def clear_fields(self):
        """Clear the input and output fields."""
        self.input_path.set("")
        self.output_folder.set("")
        self.status.set("Ready")
        self.progress.set(0)

if __name__ == "__main__":
    root = tk.Tk()
    app = TranscriptionApp(root)
    root.mainloop()


100%|███████████████████████████████████████| 139M/139M [00:25<00:00, 5.78MiB/s]
