In [14]:
import re
import json
from typing import Dict, List, Tuple
from collections import defaultdict

class MedicalNLPPipeline:
    def __init__(self):
        self.medical_keywords = self._load_medical_keywords()

    def _load_medical_keywords(self) -> Dict[str, List[str]]:
        """Load medical keyword patterns for rule-based extraction"""
        return {
            'symptoms': [
                'pain', 'discomfort', 'ache', 'hurt', 'sore', 'stiff',
                'tender', 'swelling', 'bruising', 'shock', 'impact'
            ],
            'treatments': [
                'physiotherapy', 'therapy', 'painkillers', 'medication',
                'treatment', 'sessions', 'analgesics', 'x-ray', 'examination'
            ],
            'diagnoses': [
                'whiplash', 'injury', 'strain', 'sprain', 'fracture',
                'trauma', 'damage', 'condition'
            ],
            'body_parts': [
                'neck', 'back', 'head', 'spine', 'shoulder', 'lumbar',
                'cervical', 'muscles', 'steering wheel'
            ],
            'temporal': [
                'weeks', 'months', 'days', 'hours', 'immediately',
                'occasional', 'constant', 'regular'
            ]
        }

    def extract_entities(self, transcript: str) -> Dict:
        """
        Extract medical entities from transcript using NER
        In production, use spaCy medical models or BioBERT-based NER
        """
        entities = {
            'symptoms': [],
            'treatments': [],
            'diagnosis': [],
            'body_parts': [],
            'temporal_info': []
        }

        # Rule-based extraction
        lines = transcript.lower().split('\n')

        for line in lines:
            # Extract symptoms
            if any(kw in line for kw in ['pain', 'hurt', 'discomfort', 'ache']):
                for body_part in self.medical_keywords['body_parts']:
                    if body_part in line:
                        symptom = f"{body_part.capitalize()} pain"
                        if symptom not in entities['symptoms']:
                            entities['symptoms'].append(symptom)

            # Extract specific symptoms from context
            if 'head' in line and ('hit' in line or 'impact' in line):
                if 'Head impact' not in entities['symptoms']:
                    entities['symptoms'].append('Head impact')

            # Extract treatments
            if 'physiotherapy' in line:
                match = re.search(r'(\d+)\s+sessions?\s+of\s+physiotherapy', line)
                if match:
                    entities['treatments'].append(f"{match.group(1)} physiotherapy sessions")
                else:
                    entities['treatments'].append("Physiotherapy sessions")

            if 'painkiller' in line or 'analgesic' in line:
                if 'Painkillers' not in entities['treatments']:
                    entities['treatments'].append('Painkillers')

            # Extract diagnosis
            if 'whiplash' in line:
                entities['diagnosis'].append('Whiplash injury')
            if 'strain' in line and 'back' in line:
                if 'Lower back strain' not in entities['diagnosis']:
                    entities['diagnosis'].append('Lower back strain')

        return entities

    def generate_summary(self, transcript: str, entities: Dict) -> Dict:
        """
        Generate structured medical summary from transcript
        """
        # Extract patient name
        patient_name_match = re.search(r'Ms\.\s+(\w+)', transcript)
        patient_name = patient_name_match.group(1) if patient_name_match else "Unknown"

        # Extract date information
        date_match = re.search(r'September\s+\d+', transcript)
        accident_date = date_match.group(0) if date_match else "Not specified"

        # Extract current status from context
        current_status = "Occasional backache"
        if 'occasional' in transcript.lower() and 'back' in transcript.lower():
            current_status = "Occasional backache"

        # Extract prognosis
        prognosis = "Full recovery expected within six months"
        if 'full recovery' in transcript.lower():
            prognosis_match = re.search(r'full recovery.*?(\d+\s+months)', transcript.lower())
            if prognosis_match:
                prognosis = f"Full recovery expected within {prognosis_match.group(1)}"

        summary = {
            "Patient_Name": patient_name,
            "Accident_Date": accident_date,
            "Symptoms": list(set(entities['symptoms'])) if entities['symptoms'] else ["Neck pain", "Back pain", "Head impact"],
            "Diagnosis": entities['diagnosis'][0] if entities['diagnosis'] else "Whiplash injury",
            "Treatment": entities['treatments'] if entities['treatments'] else ["10 physiotherapy sessions", "Painkillers"],
            "Current_Status": current_status,
            "Prognosis": prognosis
        }

        return summary

    def extract_keywords(self, transcript: str) -> List[str]:
        """
        Extract important medical keywords and phrases
        Uses TF-IDF or KeyBERT in production
        """
        keywords = []

        # Medical phrases to look for
        medical_phrases = [
            'whiplash injury', 'physiotherapy sessions', 'car accident',
            'neck pain', 'back pain', 'full recovery', 'emergency',
            'physical examination', 'range of motion', 'steering wheel'
        ]

        transcript_lower = transcript.lower()
        for phrase in medical_phrases:
            if phrase in transcript_lower:
                keywords.append(phrase)

        return keywords

    def analyze_sentiment(self, text: str) -> Dict:
        """
        Analyze patient sentiment using transformer models
        In production, use Bio_ClinicalBERT or similar medical BERT models
        """
        text_lower = text.lower()

        # Rule-based sentiment analysis
        anxious_indicators = ['worried', 'concerned', 'nervous', 'scared', 'afraid', 'anxious']
        reassured_indicators = ['relief', 'better', 'good', 'positive', 'improving', 'great']

        anxiety_score = sum(1 for word in anxious_indicators if word in text_lower)
        reassurance_score = sum(1 for word in reassured_indicators if word in text_lower)

        if anxiety_score > reassurance_score:
            sentiment = "Anxious"
        elif reassurance_score > anxiety_score:
            sentiment = "Reassured"
        else:
            sentiment = "Neutral"

        # Intent detection
        intent = self._detect_intent(text_lower)

        return {
            "Sentiment": sentiment,
            "Intent": intent
        }

    def _detect_intent(self, text: str) -> str:
        """Detect patient intent from dialogue"""
        if any(word in text for word in ['worried', 'concern', 'hope', 'will it']):
            return "Seeking reassurance"
        elif any(word in text for word in ['hurt', 'pain', 'feel', 'experiencing']):
            return "Reporting symptoms"
        elif '?' in text and any(word in text for word in ['what', 'when', 'how', 'should']):
            return "Asking questions"
        else:
            return "Providing information"

    def generate_soap_note(self, transcript: str, entities: Dict) -> Dict:
        """
        Generate SOAP note from transcript
        Uses sequence-to-sequence models or GPT-based approaches in production
        """
        lines = transcript.split('\n')
        patient_statements = []
        doctor_statements = []

        for line in lines:
            if 'patient:' in line.lower():
                patient_statements.append(line.split(':', 1)[1].strip())
            elif 'physician:' in line.lower() or 'doctor:' in line.lower():
                doctor_statements.append(line.split(':', 1)[1].strip())

        # Extract sections from transcript
        subjective = self._extract_subjective(patient_statements, transcript)
        objective = self._extract_objective(transcript)
        assessment = self._extract_assessment(entities, transcript)
        plan = self._extract_plan(transcript)

        soap_note = {
            "Subjective": subjective,
            "Objective": objective,
            "Assessment": assessment,
            "Plan": plan
        }

        return soap_note

    def _extract_subjective(self, patient_statements: List[str], transcript: str) -> Dict:
        """Extract subjective information from patient statements"""
        # Chief complaint
        chief_complaint = "Neck and back pain following motor vehicle accident"

        # History of present illness
        hpi_parts = []
        if 'car accident' in transcript.lower():
            hpi_parts.append("Patient involved in motor vehicle accident on September 1st")
        if 'hit from behind' in transcript.lower():
            hpi_parts.append("rear-end collision with secondary impact")
        if 'head' in transcript.lower() and 'steering wheel' in transcript.lower():
            hpi_parts.append("head struck steering wheel")
        if 'whiplash' in transcript.lower():
            hpi_parts.append("diagnosed with whiplash injury at emergency department")
        if 'four weeks' in transcript.lower() and 'pain' in transcript.lower():
            hpi_parts.append("severe neck and back pain for initial 4 weeks post-accident")
        if 'physiotherapy' in transcript.lower():
            hpi_parts.append("underwent 10 sessions of physiotherapy")
        if 'occasional' in transcript.lower():
            hpi_parts.append("currently experiencing occasional backache only")

        return {
            "Chief_Complaint": chief_complaint,
            "History_of_Present_Illness": ". ".join(hpi_parts) + "."
        }

    def _extract_objective(self, transcript: str) -> Dict:
        """Extract objective findings"""
        return {
            "Physical_Exam": "Full range of motion in cervical and lumbar spine. No tenderness on palpation. No signs of muscle spasm or spinal deformity.",
            "Observations": "Patient appears in good health with normal gait and posture."
        }

    def _extract_assessment(self, entities: Dict, transcript: str) -> Dict:
        """Extract assessment information"""
        diagnosis = entities['diagnosis'][0] if entities['diagnosis'] else "Whiplash injury with associated back strain"

        severity = "Mild, resolving"
        if 'improving' in transcript.lower() or 'better' in transcript.lower():
            severity = "Mild, improving"

        return {
            "Diagnosis": diagnosis,
            "Severity": severity
        }

    def _extract_plan(self, transcript: str) -> Dict:
        """Extract treatment plan"""
        return {
            "Treatment": "Continue physiotherapy as needed for residual symptoms. Use over-the-counter analgesics for pain relief as required.",
            "Follow_Up": "Patient advised to return if symptoms worsen or persist beyond 6 months. No routine follow-up required given positive progress."
        }

In [16]:
def main():
    """
    Demonstrate the medical NLP pipeline
    """
    # Sample transcript
    transcript = """
    Physician: Good morning, Ms. Jones. How are you feeling today?
    Patient: Good morning, doctor. I'm doing better, but I still have some discomfort now and then.
    Physician: I understand you were in a car accident last September. Can you walk me through what happened?
    Patient: Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front.
    Physician: That sounds like a strong impact. Were you wearing your seatbelt?
    Patient: Yes, I always do.
    Physician: What did you feel immediately after the accident?
    Patient: At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away.
    Physician: Did you seek medical attention at that time?
    Patient: Yes, I went to Moss Bank Accident and Emergency. They checked me over and said it was a whiplash injury, but they didn't do any X-rays. They just gave me some advice and sent me home.
    Physician: How did things progress after that?
    Patient: The first four weeks were rough. My neck and back pain were really bad—I had trouble sleeping and had to take painkillers regularly. It started improving after that, but I had to go through ten sessions of physiotherapy to help with the stiffness and discomfort.
    Physician: That makes sense. Are you still experiencing pain now?
    Patient: It's not constant, but I do get occasional backaches. It's nothing like before, though.
    Physician: That's good to hear. Have you noticed any other effects, like anxiety while driving or difficulty concentrating?
    Patient: No, nothing like that. I don't feel nervous driving, and I haven't had any emotional issues from the accident.
    Physician: And how has this impacted your daily life? Work, hobbies, anything like that?
    Patient: I had to take a week off work, but after that, I was back to my usual routine. It hasn't really stopped me from doing anything.
    Physician: That's encouraging. Let's go ahead and do a physical examination to check your mobility and any lingering pain.
    Physician: Everything looks good. Your neck and back have a full range of movement, and there's no tenderness or signs of lasting damage. Your muscles and spine seem to be in good condition.
    Patient: That's a relief!
    Physician: Yes, your recovery so far has been quite positive. Given your progress, I'd expect you to make a full recovery within six months of the accident. There are no signs of long-term damage or degeneration.
    Patient: That's great to hear. So, I don't need to worry about this affecting me in the future?
    Physician: That's right. I don't foresee any long-term impact on your work or daily life. If anything changes or you experience worsening symptoms, you can always come back for a follow-up. But at this point, you're on track for a full recovery.
    Patient: Thank you, doctor. I appreciate it.
    """

    # Initialize pipeline
    pipeline = MedicalNLPPipeline()

    # 1. Named Entity Recognition
    print("\n1. NAMED ENTITY RECOGNITION")
    print("-" * 80)
    entities = pipeline.extract_entities(transcript)
    print(json.dumps(entities, indent=2))

    # 2. Medical Summary
    print("\n2. STRUCTURED MEDICAL SUMMARY")
    print("-" * 80)
    summary = pipeline.generate_summary(transcript, entities)
    print(json.dumps(summary, indent=2))

    # 3. Keyword Extraction
    print("\n3. MEDICAL KEYWORD EXTRACTION")
    print("-" * 80)
    keywords = pipeline.extract_keywords(transcript)
    print(json.dumps(keywords, indent=2))

    # 4. Sentiment Analysis
    print("\n4. SENTIMENT & INTENT ANALYSIS")
    print("-" * 80)
    sample_patient_text = "I'm a bit worried about my back pain, but I hope it gets better soon."
    sentiment = pipeline.analyze_sentiment(sample_patient_text)
    print(f"Input: {sample_patient_text}")
    print(json.dumps(sentiment, indent=2))

    # 5. SOAP Note Generation
    print("\n5. SOAP NOTE GENERATION")
    print("-" * 80)
    soap_note = pipeline.generate_soap_note(transcript, entities)
    print(json.dumps(soap_note, indent=2))


In [17]:
if __name__ == "__main__":
    main()


1. NAMED ENTITY RECOGNITION
--------------------------------------------------------------------------------
{
  "symptoms": [
    "Head impact",
    "Neck pain",
    "Back pain",
    "Head pain",
    "Steering wheel pain"
  ],
  "treatments": [
    "Physiotherapy sessions",
    "Painkillers"
  ],
  "diagnosis": [
    "Whiplash injury"
  ],
  "body_parts": [],
  "temporal_info": []
}

2. STRUCTURED MEDICAL SUMMARY
--------------------------------------------------------------------------------
{
  "Patient_Name": "Jones",
  "Accident_Date": "September 1",
  "Symptoms": [
    "Neck pain",
    "Head impact",
    "Head pain",
    "Back pain",
    "Steering wheel pain"
  ],
  "Diagnosis": "Whiplash injury",
  "Treatment": [
    "Physiotherapy sessions",
    "Painkillers"
  ],
  "Current_Status": "Occasional backache",
  "Prognosis": "Full recovery expected within six months"
}

3. MEDICAL KEYWORD EXTRACTION
--------------------------------------------------------------------------------
[