In [15]:
%pip install openai-whisper

Note: you may need to restart the kernel to use updated packages.


In [2]:
import whisper
import torch
import os

In [3]:
# Tried Whisper on apple silicon but doesn't play nice with Metal Performance Shader yet
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [4]:
model = whisper.load_model("small", device=device) 

# using small instead of base for better accuracy
# try medium or large for better accuracy but slower speed

transcribe using whisper

In [5]:
AUDIO_PATH = "data/sample_audio4.mp3"

if os.path.exists(AUDIO_PATH):
    print("Processing audio file...")
    result = model.transcribe(AUDIO_PATH)
    print("Transcript:\n")
    print(result["text"])
else:
    print("❌ Audio file not found. Make sure it's at `data/sample_audio.mp3`.")


Processing audio file...
Transcript:

 Hospital arrangements were good, the beds were clean, the food was healthy and good, but the nurses were not on time and the medicines were not properly administered.


auto summarise using facebook/bart-large-cnn

In [6]:
from transformers import pipeline

# Load the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_text(text):
    try:
        summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"] # Adjust max_length and min_length as needed
        return summary
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return None


patient_feedback_text = result["text"]  
summary = summarize_text(patient_feedback_text)

if summary:
    print(f"Summary: {summary}")


  from .autonotebook import tqdm as notebook_tqdm
Device set to use cuda:0
Your max_length is set to 150, but your input_length is only 35. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)


Summary: Hospital arrangements were good, the beds were clean, the food was healthy and good, but the nurses were not on time and the medicines were not properly administered.


Implementing Concern Tagging using Zero-Shot Classification

In [7]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

concern_categories = ["Staff", "Billing", "Food and Amenities", "Cleanliness", "Post Discharge Care", "Communication", "Efficiency", "Comfort and Privacy", "Digital Experience"]

def tag_concerns_top_n(text, categories, top_n=3):
    """
    Tags the input text with the top N most likely concern categories.

    Args:
        text (str): The patient feedback text.
        categories (list): A list of potential concern categories.
        top_n (int): The number of top categories to return (default is 3).

    Returns:
        list or None: A list of tuples, where each tuple contains (category, score),
                     or None if an error occurs.
    """
    try:
        result = classifier(text, candidate_labels=categories)
        if result and result['labels'] and result['scores']:
            top_results = list(zip(result['labels'][:top_n], result['scores'][:top_n]))
            return top_results
        return None
    except Exception as e:
        print(f"Error tagging concern: {e}")
        return None

# Your input text
feedback_text = summary

# Get the top 3 concern tags
top_concerns = tag_concerns_top_n(feedback_text, concern_categories, top_n=3)

if top_concerns:
    print(f"Top Concern Tags:")
    for tag, score in top_concerns:
        print(f"- {tag}: {score:.2f}")
else:
    print("Could not determine concern tags.")

Device set to use cuda:0


Top Concern Tags:
- Food and Amenities: 0.34
- Cleanliness: 0.18
- Staff: 0.11


Sentiment Analysis using nlptown/bert-base-multilingual-uncased-sentiment

In [8]:
from transformers import pipeline

# Load the sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

def analyze_sentiment_simplified(text):
    """
    Analyzes the sentiment of the input text and maps it to positive, negative, or neutral.

    Args:
        text (str): The patient feedback text.

    Returns:
        tuple or None: A tuple containing the simplified sentiment label ('positive', 'negative', 'neutral')
                       and the confidence score, or None if an error occurs.
    """
    try:
        result = sentiment_analyzer(text)[0]
        star_label = result['label']
        score = result['score']

        if star_label in ['4 stars', '5 stars']:
            sentiment = 'positive'
        elif star_label in ['1 star', '2 stars']:
            sentiment = 'negative'
        else:  # '3 stars'
            sentiment = 'neutral'

        return sentiment, score
    except Exception as e:
        print(f"Error analyzing sentiment: {e}")
        return None, None

# Your input text
feedback_text = summary
sentiment_label, sentiment_score = analyze_sentiment_simplified(feedback_text)

if sentiment_label:
    print(f"Sentiment: {sentiment_label} (Score: {sentiment_score:.2f})")
else:
    print("Could not determine sentiment.")

Device set to use cuda:0


Sentiment: neutral (Score: 0.43)


Processed Feedback

In [13]:
# Assuming 'results', 'summary', 'top_concerns', 'sentiment', and 'sentiment_score' are already defined

processed_feedback = {
    "transcription": result["text"] if "text" in result else None,
    "summary": summary,
    "concern_tags": [],
    "sentiment": sentiment_label,
    "sentiment_score": sentiment_score
}

# Iterate through the list of top concerns and add them to the dictionary
if isinstance(top_concerns, list):
    for tag, score in top_concerns:
        processed_feedback["concern_tags"].append({"tag": tag, "score": score})

print(processed_feedback)

{'transcription': ' Hospital arrangements were good, the beds were clean, the food was healthy and good, but the nurses were not on time and the medicines were not properly administered.', 'summary': 'Hospital arrangements were good, the beds were clean, the food was healthy and good, but the nurses were not on time and the medicines were not properly administered.', 'concern_tags': [{'tag': 'Food and Amenities', 'score': 0.34385946393013}, {'tag': 'Cleanliness', 'score': 0.1792309433221817}, {'tag': 'Staff', 'score': 0.11391789466142654}], 'sentiment': 'neutral', 'sentiment_score': 0.4261808395385742}
