In [1]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration

import torch


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")


Device: cuda:0


In [None]:
import whisper
import torch
import time

def transcribe_audio(audio_file, model_size="medium", use_gpu=True):
    """
    Transcribes an audio file using Whisper, considering performance and accuracy.

    Args:
        audio_file (str): Path to the audio file.
        model_size (str): Size of the Whisper model ("tiny", "base", "small", "medium", "large-v2").
        use_gpu (bool): Whether to use the GPU if available.

    Returns:
        dict: A dictionary containing the transcription text and timing information.
    """

    start_time = time.time()

    # 1. Determine Device (GPU or CPU)
    if use_gpu and torch.cuda.is_available():
        device = "cuda"
        print("Using CUDA (GPU)")
    else:
        device = "cpu"
        if use_gpu:
             print("CUDA not available, using CPU instead.")
        else:
            print("Using CPU.")

    # 2. Load the Model
    try:
        model = whisper.load_model(model_size, device=device)
        print(f"Model '{model_size}' loaded successfully on {device}.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return {"text": "", "error": str(e), "load_time": time.time() - start_time}

    load_time = time.time() - start_time

    # 3. Transcribe the Audio
    try:
        transcribe_start_time = time.time()
        result = model.transcribe(audio_file)
        transcribe_time = time.time() - transcribe_start_time
        print(f"Transcription completed in {transcribe_time:.2f} seconds.")
        return {
            "text": result["text"],
            "load_time": load_time,
            "transcribe_time": transcribe_time,
            "total_time": time.time() - start_time,
        }
    except Exception as e:
        print(f"Error during transcription: {e}")
        return {
            "text": "",
            "error": str(e),
            "load_time": load_time,
            "transcribe_time": 0,
            "total_time": time.time() - start_time,
        }


if __name__ == "__main__":
    audio_file = "test.wav"  # Replace with your audio file
    # Example Usage:
    # 1. Medium model with GPU
    result_medium_gpu = transcribe_audio(audio_file, model_size="medium", use_gpu=True)
    print("\n--- Medium Model with GPU ---")
    print(f"Transcription: {result_medium_gpu.get('text', 'Error')}")
    print(f"Load Time: {result_medium_gpu.get('load_time', 0):.2f} seconds")
    print(f"Transcribe Time: {result_medium_gpu.get('transcribe_time', 0):.2f} seconds")
    print(f"Total Time: {result_medium_gpu.get('total_time', 0):.2f} seconds")
    if "error" in result_medium_gpu:
        print(f"Error: {result_medium_gpu['error']}")

    # 2. Medium model with CPU (for comparison)
    result_medium_cpu = transcribe_audio(audio_file, model_size="medium", use_gpu=False)
    print("\n--- Medium Model with CPU ---")
    print(f"Transcription: {result_medium_cpu.get('text', 'Error')}")
    print(f"Load Time: {result_medium_cpu.get('load_time', 0):.2f} seconds")
    print(f"Transcribe Time: {result_medium_cpu.get('transcribe_time', 0):.2f} seconds")
    print(f"Total Time: {result_medium_cpu.get('total_time', 0):.2f} seconds")
    if "error" in result_medium_cpu:
        print(f"Error: {result_medium_cpu['error']}")

    # 3. Small model with GPU (if medium has issues)
    result_small_gpu = transcribe_audio(audio_file, model_size="small", use_gpu=True)
    print("\n--- Small Model with GPU ---")
    print(f"Transcription: {result_small_gpu.get('text', 'Error')}")
    print(f"Load Time: {result_small_gpu.get('load_time', 0):.2f} seconds")
    print(f"Transcribe Time: {result_small_gpu.get('transcribe_time', 0):.2f} seconds")
    print(f"Total Time: {result_small_gpu.get('total_time', 0):.2f} seconds")
    if "error" in result_small_gpu:
        print(f"Error: {result_small_gpu['error']}")

In [2]:
bool({})

False

In [None]:
from numba import cuda

# Clear GPU memory
cuda.select_device(0)
cuda.close()
print("Cleared GPU memory.")

In [4]:
torch.cuda.empty_cache()
print("Cleared GPU memory.")
!nvidia-smi

Cleared GPU memory.
Failed to initialize NVML: Driver/library version mismatch
NVML library version: 535.183


In [None]:
# Import necessary libraries
from transformers import WhisperProcessor, WhisperForConditionalGeneration

# Define local cache path
local_model_path = "/home/ahmet/.cache/huggingface/hub/models--openai--whisper-large-v2"

# Load processor and model from the local path
print("Loading processor and model from local cache...")
processor = WhisperProcessor.from_pretrained(local_model_path)
model = WhisperForConditionalGeneration.from_pretrained(local_model_path).to("cuda")

print("Model loaded successfully on GPU.")

# Test the model with an example input
def test_model(audio_path):
    """
    Test the Whisper model with a given audio file.

    Args:
        audio_path (str): Path to the audio file.
    """
    print(f"Testing model on: {audio_path}")
    # Replace this with real preprocessing (e.g., spectrogram generation)
    inputs = processor(audio_path, return_tensors="pt").input_features.to("cuda")
    outputs = model.generate(inputs)
    transcription = processor.decode(outputs[0])
    print("Transcription:", transcription)

# Example usage
# Replace with an actual audio file path
test_audio_path = "./test.wav"
# Uncomment the line below to test
# test_model(test_audio_path)


In [None]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import sentencepiece
# Define the model name
model_name = "facebook/m2m100_418M"  # Smaller version (418M parameters) for testing
#model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
#tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
# Download and load the tokenizer and model
print("Downloading M2M-100 model and tokenizer...")
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
print("M2M-100 model and tokenizer downloaded successfully!")


  from .autonotebook import tqdm as notebook_tqdm


Downloading M2M-100 model and tokenizer...
M2M-100 model and tokenizer downloaded successfully!


: 

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Define the model name
model_name = "facebook/m2m100_1.2B"  # Larger model with 1.2B parameters

# Set device and dtype
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32  # Use mixed precision on GPU

# Load the tokenizer
print(f"Loading tokenizer for {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the model with optimization
print(f"Loading model {model_name} with dtype={dtype} on device={device}...")
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Apply dtype optimization
if dtype == torch.float16:
    model = model.half()  # Convert model weights to float16
model = model.to(device)  # Move model to the selected device

print("Model and tokenizer loaded successfully!")



  from .autonotebook import tqdm as notebook_tqdm


Loading tokenizer for facebook/m2m100_1.2B...
Loading model facebook/m2m100_1.2B with dtype=torch.float16 on device=cuda...
Model and tokenizer loaded successfully!


In [1]:
isCorked = None


In [2]:
isCorked != False

True

In [None]:
torch.version.cuda