In [None]:
# running trial 2
import whisper
import os

# Load the Whisper model (choose a Hindi model if available)
model = whisper.load_model("medium")  # Replace with Hindi model if applicable

def get_audio_paths(base_path):
  """
  Recursively collects audio file paths within a directory.

  Args:
      base_path: The base path of the dataset hierarchy.

  Returns:
      A list of absolute paths to all audio files.
  """
  audio_paths = []
  for root, _, files in os.walk(base_path):
    for filename in files:
      if filename.endswith(".wav") or filename.endswith(".flac") or filename.endswith(".mp3"):
        audio_paths.append(os.path.join(root, filename))  # Construct full path
  return audio_paths

# Example usage
base_path = "/content/drive/MyDrive/kathbath/hindi"  # Replace with your base path
audio_files = get_audio_paths(base_path)

def transcribe_hindi_audio(audio_files):
  """
  Transcribes multiple Hindi audio files using Whisper.

  Args:
      audio_files: A list of absolute paths to audio files.

  Returns:
      A list of transcription texts (one for each audio file).
  """
  transcriptions = []
  for audio_file in audio_files:
    try:
      result = model.transcribe(audio_file, language="hi")
      transcriptions.append(result["text"])
    except Exception as e:  # Catch potential errors
      print(f"Error transcribing {audio_file}: {e}")
  return transcriptions

# Example usage
transcriptions = transcribe_hindi_audio(audio_files)
for i, text in enumerate(transcriptions):
  print(f"Transcription for audio file {i+1}: {text}")


In [None]:
#calculating WER using asr_evaluation
#in this code we are yet to upload the ttest file

import whisper
#from asr_evaluation import word_errors
import asr_evaluation

import os
# Load the Whisper model (choose a Hindi model if available)
model = whisper.load_model("small")  # Replace with Hindi model if applicable

def get_audio_paths(base_path):
  """
  Recursively collects audio file paths within a directory.

  Args:
      base_path: The base path of the dataset hierarchy.

  Returns:
      A list of absolute paths to all audio files.
  """
  audio_paths = []
  for root, _, files in os.walk(base_path):
    for filename in files:
      if filename.endswith(".wav") or filename.endswith(".flac") or filename.endswith(".mp3"):
        audio_paths.append(os.path.join(root, filename))  # Construct full path
  return audio_paths

def transcribe_audio_files(audio_files, model):
  """
  Transcribes audio files using Whisper and returns hypotheses.

  Args:
      audio_files: A list of paths to audio files.
      model: The loaded Whisper model.

  Returns:
      A list of transcribed text hypotheses (one for each audio file).
  """
  hypotheses = []
  for audio_file in audio_files:
    result = model.transcribe(audio_file, language="hi")
    hypotheses.append(result["text"])
  return hypotheses


def load_reference_texts(path):
    loc = []
    for root, _, files in os.walk(path):
        for filename in files:
            if filename.endswith(".txt") :
                loc.append(os.path.join(root, filename))  # Construct full path
    return loc



def calculate_wer(references, hypotheses):
  """
  Calculates WER for a list of references and hypotheses.

  Args:
      references: A list of reference transcript texts.
      hypotheses: A list of corresponding hypotheses texts.

  Returns:
      The average WER across all references and hypotheses.
  """
  total_wer = 0
  for reference, hypothesis in zip(references, hypotheses):
    total_wer += wer(reference, hypothesis)
  return total_wer/reference

# Example usage
reference_texts = load_reference_texts("/content/drive/MyDrive/GV_Eval_3h/text")  # Replace with your function to load references
new_audio_files = get_audio_paths("/content/drive/MyDrive/GV_Eval_3h")  # Replace with your function to get audio paths
hypotheses = transcribe_audio_files(new_audio_files, model)
wer = calculate_wer(reference_texts, hypotheses)
print("Word Error Rate (WER):", wer)

