In [1]:
!pip install whisper
!pip install jiwer
!pip install torch



In [2]:
import os
import json
import whisper
import torch
from jiwer import wer, cer, mer, compute_measures
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time


In [3]:
!pip install --upgrade --force-reinstall openai-whisper

Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/800.5 kB[0m [31m25.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting numba (from openai-whisper)
  Downloading numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)
Collecting numpy (from openai-whisper)
  Downloading numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m4.6

In [3]:
import whisper

model = whisper.load_model("medium")

100%|█████████████████████████████████████| 1.42G/1.42G [00:21<00:00, 72.0MiB/s]
  checkpoint = torch.load(fp, map_location=device)


In [4]:
def compute_metrics(predicted_text, reference_text):
    # Word Error Rate
    wer_score = wer(reference_text, predicted_text)

    # Character Error Rate
    cer_score = cer(reference_text, predicted_text)

    # Match Error Rate
    mer_score = mer(reference_text, predicted_text)

    # Additional metrics (deletion, insertion)
    measures = compute_measures(reference_text, predicted_text)
    deletion_rate = measures['deletions'] / measures['substitutions']
    insertion_rate = measures['insertions'] / measures['substitutions']

    # Return all metrics
    return {
        "WER": wer_score,
        "CER": cer_score,
        "MER": mer_score,
        "Deletion Rate": deletion_rate,
        "Insertion Rate": insertion_rate
    }

In [5]:
import os
import json
import whisper
from jiwer import wer, cer, mer, compute_measures
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

In [6]:
import os
import time
import whisper  # Ensure you have the whisper model imported
from sklearn.metrics import precision_score, recall_score, f1_score

# Define the paths for audio recordings and transcripts
recordings_folder = "recordings"
transcripts_folder = "transcribed"
metrics_results = []

# Function to compute evaluation metrics
def compute_metrics(predicted_text, reference_text):
    # Word Error Rate
    wer_score = wer(reference_text, predicted_text)

    # Character Error Rate
    cer_score = cer(reference_text, predicted_text)

    # Match Error Rate
    mer_score = mer(reference_text, predicted_text)

    # Calculate true positives, false positives, and false negatives
    reference_words = reference_text.split()
    predicted_words = predicted_text.split()

    tp = len(set(reference_words) & set(predicted_words))  # True Positives
    fp = len(set(predicted_words) - set(reference_words))  # False Positives
    fn = len(set(reference_words) - set(predicted_words))  # False Negatives

    # Calculate metrics
    accuracy = (tp) / (tp + fp + fn) if (tp + fp + fn) > 0 else 0
    precision = (tp) / (tp + fp) if (tp + fp) > 0 else 0
    recall = (tp) / (tp + fn) if (tp + fn) > 0 else 0
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0

    # Additional metrics (deletion, insertion)
    measures = compute_measures(reference_text, predicted_text)
    deletion_rate = measures['deletions'] / measures['substitutions'] if measures['substitutions'] > 0 else 0
    insertion_rate = measures['insertions'] / measures['substitutions'] if measures['substitutions'] > 0 else 0

    # Return all metrics
    return {
        "WER": wer_score,
        "CER": cer_score,
        "MER": mer_score,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "Deletion Rate": deletion_rate,
        "Insertion Rate": insertion_rate
    }

# Loop through each WAV file
for filename in os.listdir(recordings_folder):
    if filename.endswith(".wav"):
        wav_path = os.path.join(recordings_folder, filename)

        try:
            # Load corresponding text transcript
            transcript_path = os.path.join(transcripts_folder, filename.replace(".wav", ".txt"))  # Change to .txt
            with open(transcript_path, "r") as f:
                reference_text = f.read().strip()  # Read the text file content

            # Start timer for Real-Time Factor
            start_time = time.time()

            # Transcribe the audio file
            result = model.transcribe(wav_path)
            predicted_text = result["text"]

            # Calculate Real-Time Factor (RTF)
            duration = whisper.audio.load_audio(wav_path).shape[-1] / whisper.audio.SAMPLE_RATE
            rtf = (time.time() - start_time) / duration

            # Compute metrics
            metrics = compute_metrics(predicted_text, reference_text)
            metrics["RTF"] = rtf  # Add Real-Time Factor
            metrics["Filename"] = filename

            metrics_results.append(metrics)

        except (FileNotFoundError, RuntimeError) as e:
            print(f"Error processing {filename}: {e}")

# Calculate averages
if metrics_results:  # Ensure there are results to average
    average_metrics = {
        "WER": sum(result["WER"] for result in metrics_results) / len(metrics_results),
        "CER": sum(result["CER"] for result in metrics_results) / len(metrics_results),
        "MER": sum(result["MER"] for result in metrics_results) / len(metrics_results),
        "Accuracy": sum(result["Accuracy"] for result in metrics_results) / len(metrics_results),
        "Precision": sum(result["Precision"] for result in metrics_results) / len(metrics_results),
        "Recall": sum(result["Recall"] for result in metrics_results) / len(metrics_results),
        "F1 Score": sum(result["F1 Score"] for result in metrics_results) / len(metrics_results),
        "Deletion Rate": sum(result["Deletion Rate"] for result in metrics_results) / len(metrics_results),
        "Insertion Rate": sum(result["Insertion Rate"] for result in metrics_results) / len(metrics_results),
        "RTF": sum(result["RTF"] for result in metrics_results) / len(metrics_results),
    }

    # Print out average results
    print("Average Metrics:")
    print(f"  WER: {average_metrics['WER']:.2f}")
    print(f"  CER: {average_metrics['CER']:.2f}")
    print(f"  MER: {average_metrics['MER']:.2f}")
    print(f"  Accuracy: {average_metrics['Accuracy']:.2f}")
    print(f"  Precision: {average_metrics['Precision']:.2f}")
    print(f"  Recall: {average_metrics['Recall']:.2f}")
    print(f"  F1 Score: {average_metrics['F1 Score']:.2f}")
    print(f"  Deletion Rate: {average_metrics['Deletion Rate']:.2f}")
    print(f"  Insertion Rate: {average_metrics['Insertion Rate']:.2f}")
    print(f"  RTF: {average_metrics['RTF']:.2f}")
else:
    print("No valid recordings were processed.")


Average Metrics:
  WER: 0.27
  CER: 0.16
  MER: 0.27
  Accuracy: 0.58
  Precision: 0.75
  Recall: 0.71
  F1 Score: 0.73
  Deletion Rate: 0.15
  Insertion Rate: 0.08
  RTF: 0.14


In [7]:
import os
import whisper

# Load the Whisper model
model = whisper.load_model("medium")  # You can choose a different model size like 'tiny', 'small', 'medium', or 'large'

# Define the path to your audio recordings
recordings_folder = "recordings"
transcripts_folder = "transcripts"

# Create the transcripts folder if it doesn't exist
os.makedirs(transcripts_folder, exist_ok=True)

# Loop through each WAV file in the recordings folder
for filename in os.listdir(recordings_folder):
    if filename.endswith(".wav"):
        wav_path = os.path.join(recordings_folder, filename)

        # Transcribe the audio file
        result = model.transcribe(wav_path)
        predicted_text = result["text"]

        # Save the transcript to a text file
        transcript_path = os.path.join(transcripts_folder, filename.replace(".wav", ".txt"))
        with open(transcript_path, "w") as f:
            f.write(predicted_text)

        print(f"Transcribed {filename} and saved to {transcript_path}")


  checkpoint = torch.load(fp, map_location=device)


Transcribed S10.wav and saved to transcripts/S10.txt
Transcribed S21.wav and saved to transcripts/S21.txt
Transcribed S3.wav and saved to transcripts/S3.txt
Transcribed S6.wav and saved to transcripts/S6.txt
Transcribed S13.wav and saved to transcripts/S13.txt
Transcribed S20.wav and saved to transcripts/S20.txt
Transcribed S4.wav and saved to transcripts/S4.txt
Transcribed S17.wav and saved to transcripts/S17.txt
Transcribed S16.wav and saved to transcripts/S16.txt
Transcribed S1.wav and saved to transcripts/S1.txt
Transcribed S8.wav and saved to transcripts/S8.txt
Transcribed S15.wav and saved to transcripts/S15.txt
Transcribed S9.wav and saved to transcripts/S9.txt
Transcribed S2.wav and saved to transcripts/S2.txt
Transcribed S14.wav and saved to transcripts/S14.txt
Transcribed S12.wav and saved to transcripts/S12.txt
Transcribed S5.wav and saved to transcripts/S5.txt
Transcribed S11.wav and saved to transcripts/S11.txt
Transcribed S18.wav and saved to transcripts/S18.txt
Transcrib

In [8]:
import os
import whisper

# Load the Whisper model
model = whisper.load_model("medium")  # You can choose a different model size like 'tiny', 'small', 'medium', or 'large'

# Define the path to your audio recordings
recordings_folder = "recordings"
transcripts_folder = "transcripts"

# Create the transcripts folder if it doesn't exist
os.makedirs(transcripts_folder, exist_ok=True)

# Loop through each WAV file in the recordings folder
for filename in os.listdir(recordings_folder):
    if filename.endswith(".wav"):
        wav_path = os.path.join(recordings_folder, filename)

        # Transcribe the audio file
        result = model.transcribe(wav_path)
        predicted_text = result["text"]

        # Save the transcript to a text file
        transcript_path = os.path.join(transcripts_folder, filename.replace(".wav", ".txt"))
        with open(transcript_path, "w") as f:
            f.write(predicted_text)

        # Print the transcript to the output
        print(f"Transcribed {filename}:")
        print(predicted_text)
        print("\n" + "-" * 40 + "\n")  # Separator for readability


  checkpoint = torch.load(fp, map_location=device)


Transcribed S10.wav:
 Hi, have you been managing your asthma lately? Hi doctor, it's been a bit challenging. I have had a couple of attacks, especially when I am outside in the cold. I see cold air can be trigger for many people. Have you been using your inhaler regularly? I use it when I feel busy, but I am not consistent about using it daily. It's crucial to have a preventive strategy in place. Let's review your asthma action plan and see if we need to adjust your medication. That sounds good. I want to be more proactive about it.

----------------------------------------

Transcribed S21.wav:
 Hello, how is your exercise and routine being going? Hi doctor, I've been trying to stick to a routine but it's been hard to stay motivated. Motivation can be a challenge. What type of activities do you enjoy? I like biking but I don't always have time. Have you considered shorter, more frequent workouts? Even 15-20 minutes a day can make a difference. That sounds doable. I'll give it a try.



 Implement Noise Reduction on Audio Files