## 1. Medical NLP Summarization

In [29]:
import warnings
warnings.filterwarnings('ignore')

Medical Details Extraction with pre-trained model "dslim/bert-base-NER"

In [35]:
import json
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# Load general NER model for names
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)

# Load biomedical NER model for medical entities
bio_pipe = pipeline("ner", model="d4data/biomedical-ner-all", aggregation_strategy="simple")

def extract_entity_words(ner_results, entity_code='B-PER'):
    """Extracts words for specified entity types, default to 'B-PER' for persons."""
    return [result['word'] for result in ner_results if result['entity'] == entity_code]

def merge_subwords(tokens):
    """Merges broken subword tokens properly."""
    merged_tokens = []
    current_token = ""
    for token in tokens:
        if token.startswith('##'):
            current_token += token[2:]
        else:
            if current_token:
                merged_tokens.append(current_token)
            current_token = token
    if current_token:
        merged_tokens.append(current_token)
    return merged_tokens

def extract_relevant_entities(ner_results):
    """Extracts medical entities like symptoms and treatments."""
    extracted_entities = {
        "Biological_structure": set(),
        "Symptoms": [],
        "Treatments": []
    }
    temp_symptoms = set()
    temp_treatments = []

    for entry in ner_results:
        if entry['entity_group'] == 'Biological_structure':
            extracted_entities["Biological_structure"].add(entry['word'])
        elif entry['entity_group'] == 'Sign_symptom':
            temp_symptoms.add(entry['word'])
        elif entry['entity_group'] == 'Therapeutic_procedure':
            temp_treatments.append(entry['word'])

    extracted_entities["Biological_structure"] = list(extracted_entities["Biological_structure"])
    extracted_entities["Symptoms"] = list(merge_subwords(temp_symptoms))
    extracted_entities["Treatments"] = merge_subwords(temp_treatments)
    return extracted_entities


# Example text
text = """
Physician: Good morning, Ms. Jones. How are you feeling today?
    Patient: Good morning, doctor. I'm doing better, but I still have some discomfort now and then.
    Physician: I understand you were in a car accident last September. Can you walk me through what happened?
    Patient: Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front.
    Physician: That sounds like a strong impact. Were you wearing your seatbelt?
    Patient: Yes, I always do.
    Physician: What did you feel immediately after the accident?
    Patient: At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away.
    Physician: Did you seek medical attention at that time?
    Patient: Yes, I went to Moss Bank Accident and Emergency. They checked me over and said it was a whiplash injury, but they didn't do any X-rays. They just gave me some advice and sent me home.
    Physician: How did things progress after that?
    Patient: The first four weeks were rough. My neck and back pain were really bad—I had trouble sleeping and had to take painkillers regularly. It started improving after that, but I had to go through ten sessions of physiotherapy to help with the stiffness and discomfort.
    Physician: That makes sense. Are you still experiencing pain now?
    Patient: It's not constant, but I do get occasional backaches. It's nothing like before, though.
    Physician: That's good to hear. Have you noticed any other effects, like anxiety while driving or difficulty concentrating?
    Patient: No, nothing like that. I don't feel nervous driving, and I haven't had any emotional issues from the accident.
    Physician: And how has this impacted your daily life? Work, hobbies, anything like that?
    Patient: I had to take a week off work, but after that, I was back to my usual routine. It hasn't really stopped me from doing anything.
    Physician: That's encouraging. Let's go ahead and do a physical examination to check your mobility and any lingering pain.
    [Physical Examination Conducted]
    Physician: Everything looks good. Your neck and back have a full range of movement, and there's no tenderness or signs of lasting damage. Your muscles and spine seem to be in good condition.
    Patient: That's a relief!
    Physician: Yes, your recovery so far has been quite positive. Given your progress, I'd expect you to make a full recovery within six months of the accident. There are no signs of long-term damage or degeneration.
    Patient: That's great to hear. So, I don't need to worry about this affecting me in the future?
    Physician: That's right. I don't foresee any long-term impact on your work or daily life. If anything changes or you experience worsening symptoms, you can always come back for a follow-up. But at this point, you're on track for a full recovery.
    Patient: Thank you, doctor. I appreciate it.
    Physician: You're very welcome, Ms. Jones. Take care, and don't hesitate to reach out if you need anything.+-
"""

# Extract names using the general NER model
ner_results = nlp(text)
patient_names = extract_entity_words(ner_results)

# Extract medical information using the biomedical NER model
bio_ner_results = bio_pipe(text)
filtered_results = extract_relevant_entities(bio_ner_results)

final_results = {"Patient_Name": patient_names[0] if patient_names else "Unknown"}
final_results.update(filtered_results)  # Merge filtered_results directly into final_results

# Convert dictionary to JSON string for display or storage
json_output = json.dumps(final_results, indent=2)
print(json_output)

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Device set to use cpu


{
  "Patient_Name": "Jones",
  "Biological_structure": [
    "neck",
    "back"
  ],
  "Symptoms": [
    "discomfort",
    "anxiety",
    "pain",
    "stiffache"
  ],
  "Treatments": [
    "painkill",
    "physiotherapy"
  ]
}


### Summarization with pre-trained model on Hugging Face "facebook/bart-large-cnn"


In [36]:
# Load summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_text(text):
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Example conversation
conversation = """
Good morning, Ms. Jones. How are you feeling today?
Good morning, doctor. I’m doing better, but I still have some discomfort now and then.
I understand you were in a car accident last September. Can you walk me through what happened?
Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front.
That sounds like a strong impact. Were you wearing your seatbelt?
Yes, I always do.
What did you feel immediately after the accident?
At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away.
"""

# Extract entities and summarize
summary = summarize_text(conversation)

print("Summary:", summary)


Device set to use cpu


Summary: Ms. Jones was driving from Cheadle Hulme to Manchester when she had to stop in traffic. Out of nowhere, another car hit her from behind, which pushed her car into the one in front.


## For more efficient and seamless workflow, it could be done with LLM as well, for example, here we use "gemini-2.0-flash" model.

In [11]:
import google.generativeai as genai
import json
import os

class MedicalNLPPipeline:
    def __init__(self):

        genai.configure(api_key="GEMINI_API_KEY")
        self.model = genai.GenerativeModel('gemini-2.0-flash')

    def extract_medical_entities(self, transcript):
        """
        Extract medical entities such as symptoms, diagnosis, treatment, and prognosis
        using the Gemini API.
        """
        prompt = f"""
        Extract medical entities from the following physician-patient conversation.

        Conversation:
        {transcript}

        Extract the following entities:
        1. Patient_Name: The name of the patient
        2. Symptoms: List of symptoms mentioned
        3. Diagnosis: Any diagnosis made
        4. Treatment: List of treatments mentioned
        5. Current_Status: Current state of the patient
        6. Prognosis: Expected outcome or future predictions

        Return the output as a JSON object with these fields.
        """

        response = self.model.generate_content(prompt)

        # Extract JSON from the response
        try:
            # First try to parse directly if the response is clean JSON
            result = json.loads(response.text)
        except json.JSONDecodeError:
            # If that fails, try to extract JSON from the text
            text = response.text
            json_start = text.find('{')
            json_end = text.rfind('}') + 1
            if json_start >= 0 and json_end > json_start:
                json_str = text[json_start:json_end]
                try:
                    result = json.loads(json_str)
                except json.JSONDecodeError:
                    result = {"error": "Could not parse JSON from response"}
            else:
                result = {"error": "No JSON found in response"}

        return result

    def generate_medical_summary(self, transcript):

        prompt = f"""
        Create a concise medical summary from the following physician-patient conversation.
        Focus on key medical information, diagnoses, treatments, and outcomes.

        Conversation:
        {transcript}

        Format the summary as a clinical note with clear sections.
        """

        response = self.model.generate_content(prompt)
        return response.text

    def process_transcript(self, transcript):
        """
        Process the entire transcript and return a comprehensive analysis.
        """
        entities = self.extract_medical_entities(transcript)
        summary = self.generate_medical_summary(transcript)

        return {
            "medical_entities": entities,
            "medical_summary": summary
        }


# Example usage
if __name__ == "__main__":
    # Sample transcript from the assignment
    transcript = """
    Physician: Good morning, Ms. Jones. How are you feeling today?
    Patient: Good morning, doctor. I'm doing better, but I still have some discomfort now and then.
    Physician: I understand you were in a car accident last September. Can you walk me through what happened?
    Patient: Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front.
    Physician: That sounds like a strong impact. Were you wearing your seatbelt?
    Patient: Yes, I always do.
    Physician: What did you feel immediately after the accident?
    Patient: At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away.
    Physician: Did you seek medical attention at that time?
    Patient: Yes, I went to Moss Bank Accident and Emergency. They checked me over and said it was a whiplash injury, but they didn't do any X-rays. They just gave me some advice and sent me home.
    Physician: How did things progress after that?
    Patient: The first four weeks were rough. My neck and back pain were really bad—I had trouble sleeping and had to take painkillers regularly. It started improving after that, but I had to go through ten sessions of physiotherapy to help with the stiffness and discomfort.
    Physician: That makes sense. Are you still experiencing pain now?
    Patient: It's not constant, but I do get occasional backaches. It's nothing like before, though.
    Physician: That's good to hear. Have you noticed any other effects, like anxiety while driving or difficulty concentrating?
    Patient: No, nothing like that. I don't feel nervous driving, and I haven't had any emotional issues from the accident.
    Physician: And how has this impacted your daily life? Work, hobbies, anything like that?
    Patient: I had to take a week off work, but after that, I was back to my usual routine. It hasn't really stopped me from doing anything.
    Physician: That's encouraging. Let's go ahead and do a physical examination to check your mobility and any lingering pain.
    [Physical Examination Conducted]
    Physician: Everything looks good. Your neck and back have a full range of movement, and there's no tenderness or signs of lasting damage. Your muscles and spine seem to be in good condition.
    Patient: That's a relief!
    Physician: Yes, your recovery so far has been quite positive. Given your progress, I'd expect you to make a full recovery within six months of the accident. There are no signs of long-term damage or degeneration.
    Patient: That's great to hear. So, I don't need to worry about this affecting me in the future?
    Physician: That's right. I don't foresee any long-term impact on your work or daily life. If anything changes or you experience worsening symptoms, you can always come back for a follow-up. But at this point, you're on track for a full recovery.
    Patient: Thank you, doctor. I appreciate it.
    Physician: You're very welcome, Ms. Jones. Take care, and don't hesitate to reach out if you need anything.
    """

    pipeline = MedicalNLPPipeline()
    results = pipeline.process_transcript(transcript)

    # Print the results
    print("MEDICAL ENTITIES:")
    print(json.dumps(results["medical_entities"], indent=2))

    print("\nMEDICAL SUMMARY:")
    print(results["medical_summary"])


MEDICAL ENTITIES:
{
  "Patient_Name": "Ms. Jones",
  "Symptoms": [
    "discomfort",
    "hit head on steering wheel",
    "neck pain",
    "back pain",
    "trouble sleeping",
    "stiffness",
    "backaches"
  ],
  "Diagnosis": [
    "whiplash injury"
  ],
  "Treatment": [
    "painkillers",
    "physiotherapy (ten sessions)",
    "advice"
  ],
  "Current_Status": "Occasional backaches, but not constant. Neck and back have full range of movement, no tenderness or signs of lasting damage. Muscles and spine seem to be in good condition.",
  "Prognosis": "Expect to make a full recovery within six months of the accident. No signs of long-term damage or degeneration. No long-term impact on work or daily life is foreseen."
}

MEDICAL SUMMARY:
## Clinical Note

**Patient:** Ms. Jones
**Date:** Current Date (Assumed)

**Chief Complaint:** Occasional backaches.

**History of Present Illness:** Ms. Jones presents for follow-up after being involved in a motor vehicle accident on September 1st. 

## Sentiment & Intent Analysis with VADER (Valence Aware Dictionary and sEntiment Reasoner), a lexicon and rule-based sentiment analysis tool

In [32]:
!pip install vaderSentiment



In [33]:
import json
import torch
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize VADER for sentiment analysis
vader_analyzer = SentimentIntensityAnalyzer()

# Initialize Hugging Face pipeline for intent classification
intent_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Function to classify sentiment using VADER
def classify_sentiment_vader(text):
    scores = vader_analyzer.polarity_scores(text)
    if scores['compound'] >= 0.05:
        return "Reassured"
    elif scores['compound'] <= -0.05:
        return "Anxious"
    else:
        return "Neutral"

# Function to classify intent using a zero-shot model
def classify_intent(text):
    possible_intents = ["Seeking reassurance", "Reporting symptoms", "Expressing concern"]
    result = intent_pipeline(text, possible_intents)
    return result["labels"][0]  # Highest confidence intent

# Sample text
text = "I'm a bit worried about my back pain, but I hope it gets better soon."

# Generate output in JSON format
output = {
    "Sentiment": classify_sentiment_vader(text),
    "Intent": classify_intent(text)
}

print(json.dumps(output,indent=2))



Device set to use cpu


{
  "Sentiment": "Reassured",
  "Intent": "Expressing concern"
}
