In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# Define input folder (videos) and check available files
audio_folder = "/content/drive/My Drive/videos"
files = os.listdir(audio_folder)

print("Found files:", files)

In [None]:
import os
import subprocess

# Define input and output folders
input_folder = "/content/drive/My Drive/videos"
output_folder = "/content/drive/My Drive/audios"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Convert video files to audio
for file in os.listdir(input_folder):
    if file.lower().endswith((".mp4", ".mov", ".avi")):  # Supports multiple video formats
        input_path = os.path.join(input_folder, file)
        output_path = os.path.join(output_folder, os.path.splitext(file)[0] + ".m4a")

        cmd = ["ffmpeg", "-i", input_path, "-vn", "-acodec", "aac", "-b:a", "192k", output_path, "-y"]
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

print("✅ Conversion completed! Audio files saved in:", output_folder)

In [None]:
import torch

# Check if GPU is available
gpu_available = torch.cuda.is_available()
gpu_name = torch.cuda.get_device_name(0) if gpu_available else "No GPU detected"

print(f"GPU available: {gpu_available}")
print(f"GPU Name: {gpu_name}")

In [None]:
# Install Whisper and FFmpeg
!pip install -q git+https://github.com/openai/whisper.git
!sudo apt update && sudo apt install ffmpeg


In [None]:
import whisper

print("Loading Whisper model...")
model = whisper.load_model("turbo", device="cuda")  # Using the 'turbo' model for better performance
print("✅ Whisper 'turbo' model loaded successfully!")

In [None]:
import os
import whisper

# Define paths in Google Drive
input_folder = "/content/drive/My Drive/audios"  # Folder with .m4a audio files
output_folder = "/content/drive/My Drive/transcripts"  # Folder to save transcripts

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Load Whisper model on GPU ('turbo' model for faster processing)
model = whisper.load_model("turbo", device="cuda")  # Faster and optimized model

# Process all .m4a files in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith(".m4a"):
        input_path = os.path.join(input_folder, file_name)
        output_path = os.path.join(output_folder, file_name.replace(".m4a", ".txt"))

        print(f"Transcribing: {file_name} using GPU...")

        # Transcribe the audio
        result = model.transcribe(input_path, language="en")  # Default language set to English (can be changed)

        # Save the transcript
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(result["text"])

        print(f"Transcription saved at: {output_path}")

print("✅ Transcription completed! All files have been processed.")