In [35]:
import os
os.chdir('/Users/anujshah/Downloads/nurse-summary-automation/')

In [36]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from src.audio_processing.transcriber import load_model, transcribe_audio

In [37]:
# Load Whisper model
model = load_model("large")

Loading Whisper model: large...


100%|████████████████████████████████████| 2.88G/2.88G [1:59:29<00:00, 431kiB/s]
  checkpoint = torch.load(fp, map_location=device)


Model loaded successfully.


In [38]:
def compute_similarity(text1, text2):
    """
    Compute cosine similarity between two texts.

    Args:
        text1 (str): The first text string.
        text2 (str): The second text string.

    Returns:
        float: Cosine similarity score between text1 and text2.
    """
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    return cosine_similarity(vectors)[0, 1]

In [39]:
def evaluate_transcriptions(audio_dir, transcription_dir, model):
    """
    Evaluate the transcriptions of all audio files in a directory.

    Args:
        audio_dir (str): Directory containing the audio files.
        transcription_dir (str): Directory containing the expected transcription text files.
        model: Loaded Whisper model.

    Returns:
        dict: Dictionary with audio file names and their corresponding similarity scores.
    """
    audio_files = sorted([f for f in os.listdir(audio_dir) if f.endswith('.mp3')])
    scores = {}

    for audio_file in audio_files:
        print(f"Processing {audio_file}...")
        
        # Transcribe the audio file
        audio_path = os.path.join(audio_dir, audio_file)
        transcribed_text = transcribe_audio(audio_path, model)
        
        # Load the expected transcription
        expected_file = os.path.splitext(audio_file)[0] + '_0.txt'
        expected_path = os.path.join(transcription_dir, expected_file)
        
        with open(expected_path, 'r') as file:
            expected_text = file.read()
        
        # Compute similarity between transcribed text and expected text
        similarity_score = compute_similarity(transcribed_text, expected_text)
        scores[audio_file] = similarity_score
        print(f"Similarity score for {audio_file}: {similarity_score:.2f}\n")
    
    return scores

In [40]:
# Define directories
audio_dir = 'data/test/audio'
transcription_dir = 'data/test/transcriptions'

# Evaluate transcriptions and get scores
similarity_scores = evaluate_transcriptions(audio_dir, transcription_dir, model)

Processing Mom 4 - 1.mp3...
Transcribing audio file: data/test/audio/Mom 4 - 1.mp3...




Transcription completed.
Similarity score for Mom 4 - 1.mp3: 0.97

Processing Neeraj 5.mp3...
Transcribing audio file: data/test/audio/Neeraj 5.mp3...




Transcription completed.
Similarity score for Neeraj 5.mp3: 0.96

Processing Rohan Note 1.mp3...
Transcribing audio file: data/test/audio/Rohan Note 1.mp3...




Transcription completed.
Similarity score for Rohan Note 1.mp3: 0.97

Processing Tanay 3.mp3...
Transcribing audio file: data/test/audio/Tanay 3.mp3...




Transcription completed.
Similarity score for Tanay 3.mp3: 0.98

Processing Woman note 2.mp3...
Transcribing audio file: data/test/audio/Woman note 2.mp3...




Transcription completed.
Similarity score for Woman note 2.mp3: 0.97



In [46]:
import pandas as pd

# Convert the scores dictionary to a DataFrame for better visualization
scores_df = pd.DataFrame(list(similarity_scores.items()), columns=['Audio File', 'Similarity Score'])

# Sort by similarity score
scores_df = scores_df.sort_values(by='Similarity Score', ascending=False)

# Display the scores
print(f"Average similarity score is {scores_df["Similarity Score"].mean()}")

Average similarity score is 0.9723360397795615
