In [None]:
!pip install whisper
!pip install jiwer
!pip install torch



In [None]:
import os
import json
import whisper
import torch
from jiwer import wer, cer, mer, compute_measures
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time


In [None]:
!pip install --upgrade --force-reinstall openai-whisper

Collecting openai-whisper
  Using cached openai_whisper-20240930-py3-none-any.whl
Collecting numba (from openai-whisper)
  Using cached numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)
Collecting numpy (from openai-whisper)
  Using cached numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting torch (from openai-whisper)
  Using cached torch-2.5.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting tqdm (from openai-whisper)
  Using cached tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)
Collecting more-itertools (from openai-whisper)
  Using cached more_itertools-10.5.0-py3-none-any.whl.metadata (36 kB)
Collecting tiktoken (from openai-whisper)
  Using cached tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting triton>=2.0.0 (from openai-whisper)
  Using cached triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 

In [None]:
import whisper

model = whisper.load_model("medium")

100%|█████████████████████████████████████| 1.42G/1.42G [00:15<00:00, 98.2MiB/s]
  checkpoint = torch.load(fp, map_location=device)


In [None]:
def compute_metrics(predicted_text, reference_text):
    # Word Error Rate
    wer_score = wer(reference_text, predicted_text)

    # Character Error Rate
    cer_score = cer(reference_text, predicted_text)

    # Match Error Rate
    mer_score = mer(reference_text, predicted_text)

    # Additional metrics (deletion, insertion)
    measures = compute_measures(reference_text, predicted_text)
    deletion_rate = measures['deletions'] / measures['substitutions']
    insertion_rate = measures['insertions'] / measures['substitutions']

    # Return all metrics
    return {
        "WER": wer_score,
        "CER": cer_score,
        "MER": mer_score,
        "Deletion Rate": deletion_rate,
        "Insertion Rate": insertion_rate
    }

In [None]:
import os
import json
import whisper
from jiwer import wer, cer, mer, compute_measures
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

In [None]:
import os
import time
import whisper  # Ensure you have the whisper model imported
from sklearn.metrics import precision_score, recall_score, f1_score

# Define the paths for audio recordings and transcripts
recordings_folder = "recordings"
transcripts_folder = "transcribed"
metrics_results = []

# Function to compute evaluation metrics
def compute_metrics(predicted_text, reference_text):
    # Word Error Rate
    wer_score = wer(reference_text, predicted_text)

    # Character Error Rate
    cer_score = cer(reference_text, predicted_text)

    # Match Error Rate
    mer_score = mer(reference_text, predicted_text)

    # Calculate true positives, false positives, and false negatives
    reference_words = reference_text.split()
    predicted_words = predicted_text.split()

    tp = len(set(reference_words) & set(predicted_words))  # True Positives
    fp = len(set(predicted_words) - set(reference_words))  # False Positives
    fn = len(set(reference_words) - set(predicted_words))  # False Negatives

    # Calculate metrics
    accuracy = (tp) / (tp + fp + fn) if (tp + fp + fn) > 0 else 0
    precision = (tp) / (tp + fp) if (tp + fp) > 0 else 0
    recall = (tp) / (tp + fn) if (tp + fn) > 0 else 0
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0

    # Additional metrics (deletion, insertion)
    measures = compute_measures(reference_text, predicted_text)
    deletion_rate = measures['deletions'] / measures['substitutions'] if measures['substitutions'] > 0 else 0
    insertion_rate = measures['insertions'] / measures['substitutions'] if measures['substitutions'] > 0 else 0

    # Return all metrics
    return {
        "WER": wer_score,
        "CER": cer_score,
        "MER": mer_score,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "Deletion Rate": deletion_rate,
        "Insertion Rate": insertion_rate
    }

# Loop through each WAV file
for filename in os.listdir(recordings_folder):
    if filename.endswith(".wav"):
        wav_path = os.path.join(recordings_folder, filename)

        try:
            # Load corresponding text transcript
            transcript_path = os.path.join(transcripts_folder, filename.replace(".wav", ".txt"))  # Change to .txt
            with open(transcript_path, "r") as f:
                reference_text = f.read().strip()  # Read the text file content

            # Start timer for Real-Time Factor
            start_time = time.time()

            # Transcribe the audio file
            result = model.transcribe(wav_path)
            predicted_text = result["text"]

            # Calculate Real-Time Factor (RTF)
            duration = whisper.audio.load_audio(wav_path).shape[-1] / whisper.audio.SAMPLE_RATE
            rtf = (time.time() - start_time) / duration

            # Compute metrics
            metrics = compute_metrics(predicted_text, reference_text)
            metrics["RTF"] = rtf  # Add Real-Time Factor
            metrics["Filename"] = filename

            metrics_results.append(metrics)

        except (FileNotFoundError, RuntimeError) as e:
            print(f"Error processing {filename}: {e}")

# Calculate averages
if metrics_results:  # Ensure there are results to average
    average_metrics = {
        "WER": sum(result["WER"] for result in metrics_results) / len(metrics_results),
        "CER": sum(result["CER"] for result in metrics_results) / len(metrics_results),
        "MER": sum(result["MER"] for result in metrics_results) / len(metrics_results),
        "Accuracy": sum(result["Accuracy"] for result in metrics_results) / len(metrics_results),
        "Precision": sum(result["Precision"] for result in metrics_results) / len(metrics_results),
        "Recall": sum(result["Recall"] for result in metrics_results) / len(metrics_results),
        "F1 Score": sum(result["F1 Score"] for result in metrics_results) / len(metrics_results),
        "Deletion Rate": sum(result["Deletion Rate"] for result in metrics_results) / len(metrics_results),
        "Insertion Rate": sum(result["Insertion Rate"] for result in metrics_results) / len(metrics_results),
        "RTF": sum(result["RTF"] for result in metrics_results) / len(metrics_results),
    }

    # Print out average results
    print("Average Metrics:")
    print(f"  WER: {average_metrics['WER']:.2f}")
    print(f"  CER: {average_metrics['CER']:.2f}")
    print(f"  MER: {average_metrics['MER']:.2f}")
    print(f"  Accuracy: {average_metrics['Accuracy']:.2f}")
    print(f"  Precision: {average_metrics['Precision']:.2f}")
    print(f"  Recall: {average_metrics['Recall']:.2f}")
    print(f"  F1 Score: {average_metrics['F1 Score']:.2f}")
    print(f"  Deletion Rate: {average_metrics['Deletion Rate']:.2f}")
    print(f"  Insertion Rate: {average_metrics['Insertion Rate']:.2f}")
    print(f"  RTF: {average_metrics['RTF']:.2f}")
else:
    print("No valid recordings were processed.")


Average Metrics:
  WER: 0.28
  CER: 0.18
  MER: 0.27
  Accuracy: 0.68
  Precision: 0.83
  Recall: 0.79
  F1 Score: 0.81
  Deletion Rate: 0.10
  Insertion Rate: 0.04
  RTF: 0.14


In [None]:
import os
import whisper

# Load the Whisper model
model = whisper.load_model("medium")  # You can choose a different model size like 'tiny', 'small', 'medium', or 'large'

# Define the path to your audio recordings
recordings_folder = "recordings"
transcripts_folder = "transcripts"

# Create the transcripts folder if it doesn't exist
os.makedirs(transcripts_folder, exist_ok=True)

# Loop through each WAV file in the recordings folder
for filename in os.listdir(recordings_folder):
    if filename.endswith(".wav"):
        wav_path = os.path.join(recordings_folder, filename)

        # Transcribe the audio file
        result = model.transcribe(wav_path)
        predicted_text = result["text"]

        # Save the transcript to a text file
        transcript_path = os.path.join(transcripts_folder, filename.replace(".wav", ".txt"))
        with open(transcript_path, "w") as f:
            f.write(predicted_text)

        print(f"Transcribed {filename} and saved to {transcript_path}")


  checkpoint = torch.load(fp, map_location=device)


Transcribed record_out (2).wav and saved to transcripts/record_out (2).txt
Transcribed record_out (3).wav and saved to transcripts/record_out (3).txt
Transcribed record_out (1).wav and saved to transcripts/record_out (1).txt
Transcribed record_out.wav and saved to transcripts/record_out.txt


In [None]:
import os
import whisper

# Load the Whisper model
model = whisper.load_model("medium")  # You can choose a different model size like 'tiny', 'small', 'medium', or 'large'

# Define the path to your audio recordings
recordings_folder = "recordings"
transcripts_folder = "transcripts"

# Create the transcripts folder if it doesn't exist
os.makedirs(transcripts_folder, exist_ok=True)

# Loop through each WAV file in the recordings folder
for filename in os.listdir(recordings_folder):
    if filename.endswith(".wav"):
        wav_path = os.path.join(recordings_folder, filename)

        # Transcribe the audio file
        result = model.transcribe(wav_path)
        predicted_text = result["text"]

        # Save the transcript to a text file
        transcript_path = os.path.join(transcripts_folder, filename.replace(".wav", ".txt"))
        with open(transcript_path, "w") as f:
            f.write(predicted_text)

        # Print the transcript to the output
        print(f"Transcribed {filename}:")
        print(predicted_text)
        print("\n" + "-" * 40 + "\n")  # Separator for readability


  checkpoint = torch.load(fp, map_location=device)


Transcribed record_out (2).wav:
 Good afternoon, what seems to be the problem? I'm experiencing a lot of fatigue and headaches. How long have you been feeling this way? It's been about 2 weeks now. Let's run some tests to see what might be causing it.

----------------------------------------

Transcribed record_out (3).wav:
 Hi there, how can I help you today? I think I might have allergies. I've been sneezing a lot. Have you been exposed to any allergens recently? Yes, I was cleaning out my attic and there was a lot of dust. Let's discuss some allergy treatments.

----------------------------------------

Transcribed record_out (1).wav:
 Hello, what brings you in today? I've had this persistent cough for the last week. Have you noticed any other symptoms? Yes, I've also had a bit of a sore throat. Let's take a look and see what's going on.

----------------------------------------

Transcribed record_out.wav:
 Good morning, how are you feeling today? I've been feeling really anxious 