## Whisper Demonstration Notebook using OpenAI's Whisper Implementation with GPU Support

Step 1: Import Required Libraries

In [None]:
import whisper
import torch
import librosa

Step 2: Check for GPU Availability and Load the Whisper Model on GPU if Available

In [None]:
device = "cuda"
model = whisper.load_model("turbo").to(device)

Step 3: Define Function to Load and Transcribe Audio

In [None]:
def transcribe_audio(file_path):
    """
    Load the audio with librosa, convert to 16kHz, and transcribe using Whisper.
    """
    # Load audio file and resample to 16kHz
    audio, sr = librosa.load(file_path, sr=16000)
    
    # Convert audio to the tensor format Whisper expects and move it to the GPU if available
    audio_tensor = torch.tensor(audio).to(device)
    
    # Transcribe the audio tensor
    result = model.transcribe(audio_tensor)
    transcription = result['text']
    return transcription

Step 4: Define Function to Translate Audio

In [None]:
def translate_audio(file_path, target_language="de"):  # For example, 'de' for German
    """
    Load the audio with librosa, convert to 16kHz, and translate using Whisper.
    """
    # Load audio file and resample to 16kHz
    audio, sr = librosa.load(file_path, sr=16000)
    
    # Convert audio to the tensor format Whisper expects and move it to the GPU if available
    audio_tensor = torch.tensor(audio).to(device)
    
    # Translate the audio tensor by setting the task to 'translate'
    result = model.transcribe(audio_tensor, task="translate", language=target_language)
    translation = result['text']
    return translation


First demo: Transcription

In [None]:
file_path = "Recording.wav"
# Transcription
try:
    transcription = transcribe_audio(file_path)
    print("Transcription:", transcription)
except Exception as e:
    print("Error during transcription:", e)

Second demo: Transcribing and translating from Chinese

In [None]:
file_path = "Recording_zh.wav"
# Translation
try:
    target_language = "en"  # e.g., 'fr' for French, 'es' for Spanish
    transcription = transcribe_audio(file_path)
    print("Transcription:", transcription)
    translation = translate_audio(file_path, target_language)
    print(f"Translation ({target_language}):", translation)
except Exception as e:
    print("Error during translation:", e)