In [1]:
!pip install pymupdf
!pip install gradio
!pip install groq --upgrade
import numpy as np
from typing import List, Dict, Any
import re
from dataclasses import dataclass
from datetime import datetime
import logging
import fitz  # for PDF parsing
import gradio as gr
from groq import Groq
import json
import uuid  # Add this import
import traceback  # Add this import

GROQ_API_KEY = "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"
MODEL = 'llama3-groq-70b-8192-tool-use-preview'

Collecting pymupdf
  Downloading PyMuPDF-1.24.13-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading PyMuPDF-1.24.13-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.24.13
Collecting gradio
  Downloading gradio-5.5.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading hugging

In [2]:
class MedicalReport:
    def __init__(self, patient_id: str, report_text: str, report_date: datetime, report_type: str):
        self.patient_id = patient_id
        self.report_text = report_text
        self.report_date = report_date
        self.report_type = report_type

class BaseAgent:
    def __init__(self, name: str):
        self.name = name

    def process(self, report: MedicalReport) -> Dict:
        raise NotImplementedError

class TextPreprocessingAgent(BaseAgent):
    def __init__(self):
        super().__init__("TextPreprocessingAgent")

    def process(self, report: MedicalReport) -> Dict:
        """Clean and normalize medical report text"""
        text = report.report_text.lower()
        # Remove special characters
        text = re.sub(r'[^\w\s]', ' ', text)
        # Remove extra whitespace
        text = ' '.join(text.split())
        return {"processed_text": text}

In [3]:
import traceback

class EntityExtractionAgent(BaseAgent):
    def __init__(self):
        super().__init__("EntityExtractionAgent")
        self.groq_client = Groq(api_key=GROQ_API_KEY)

    def process(self, report: MedicalReport) -> Dict:
        """Extract medical entities from processed text"""
        try:
            response = self.groq_client.chat.completions.create(
                model=MODEL,
                messages=[
                    {
                        "role": "system",
                        "content": """You are an expert medical entity extractor.
                        Extract medical entities from the given text.
                        Provide a structured JSON response with these categories:
                        - conditions: specific medical conditions
                        - medications: any medications mentioned
                        - tests: medical tests performed and preventive measures
                        Ensure each category is a clean list of unique entities."""
                    },
                    {
                        "role": "user",
                        "content": f"Extract medical entities from this text: {report.report_text}"
                    }
                ],
                response_format={"type": "json_object"},
                max_tokens=4096,
                temperature=0.5
            )

            # Parse the response
            entities_response = json.loads(response.choices[0].message.content)

            # Clean and validate the entities
            cleaned_entities = {
                "conditions": list(set(entities_response.get("conditions", []))),
                "medications": list(set(entities_response.get("medications", []))),
                "tests": list(set(entities_response.get("tests", [])))
            }

            return {"entities": cleaned_entities}

        except Exception as e:
            print(f"Error in entity extraction: {e}")
            return {"entities": {
                "conditions": [],
                "medications": [],
                "tests": []
            }}

class DiagnosisAgent(BaseAgent):
    def __init__(self):
        super().__init__("DiagnosisAgent")
        self.groq_client = Groq(api_key=GROQ_API_KEY)

    def process(self, report: MedicalReport, entities: Dict) -> Dict:
        try:
            # Flatten and filter entities
            all_entities = []
            for category, items in entities.items():
                all_entities.extend([item for item in items if item])

            # If no entities, return default response
            if not all_entities:
                return {"diagnoses": ["No significant medical entities detected"]}

            # More robust diagnosis generation
            response = self.groq_client.chat.completions.create(
                model=MODEL,
                messages=[
                    {
                        "role": "system",
                        "content": """You are an expert medical diagnostician.
                        Generate a concise, structured list of potential diagnoses
                        based on the medical entities.
                        For each diagnosis:
                        - Provide a clear, brief explanation
                        - Highlight key diagnostic considerations and preventive measures
                        """
                    },
                    {
                        "role": "user",
                        "content": f"Analyze these medical entities and provide potential diagnoses: {', '.join(all_entities)}"
                    }
                ],
                max_tokens=4096,
                temperature=0.5
            )

            # Clean and structure diagnoses
            diagnoses_text = response.choices[0].message.content.strip()
            diagnoses = [
                diagnosis.strip()
                for diagnosis in diagnoses_text.split('\n')
                if diagnosis.strip() and not diagnosis.startswith('Based on')
            ]

            return {"diagnoses": diagnoses}

        except Exception as e:
            print(f"Error in diagnosis generation: {e}")
            return {"diagnoses": ["Diagnosis generation failed"]}



In [4]:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class AlertAgent(BaseAgent):
    def __init__(self):
        super().__init__("AlertAgent")
        self.critical_terms = [
            "urgent", "critical", "emergency",
            "immediate attention", "severe",
            "life-threatening", "high risk"
        ]

    def process(self, report: MedicalReport, diagnoses: List[str]) -> Dict:
        """Generate alerts based on critical findings"""
        text = report.report_text.lower()
        alerts = []

        # Generate structured alerts
        try:
            # Check for critical terms in the text
            critical_alerts = [
                f"CRITICAL ALERT: Urgent term '{term}' detected in report"
                for term in self.critical_terms
                if term in text
            ]
            alerts.extend(critical_alerts)

            # Add structured diagnosis alerts
            diagnosis_alerts = [
                f"MEDICAL ALERT: {diagnosis}"
                for diagnosis in diagnoses
                if diagnosis
            ]
            alerts.extend(diagnosis_alerts)

            # If no alerts generated, add a default alert
            if not alerts:
                alerts.append("MEDICAL REVIEW: No immediate critical findings")

            return {"alerts": alerts}

        except Exception as e:
            print(f"Error in alert generation: {e}")
            return {"alerts": ["Alert generation failed"]}

class ReportAnalysisSystem:
    def __init__(self):
        self.agents = [
            TextPreprocessingAgent(),
            EntityExtractionAgent(),
            DiagnosisAgent(),
            AlertAgent()
        ]

    def analyze_report(self, report_text: str) -> Dict:
        """Orchestrate the multi-agent analysis of a medical report"""
        try:
            # Create initial report object
            report = MedicalReport(
                patient_id=str(uuid.uuid4()),  # Generate a unique ID
                report_text=report_text,
                report_date=datetime.now(),
                report_type="PDF Report"
            )

            # Initialize results dictionary
            analysis_results = {}

            # Process through agents
            entity_results = None
            diagnoses = []

            for agent in self.agents:
                if agent.name == "TextPreprocessingAgent":
                    preprocessed = agent.process(report)
                    analysis_results[agent.name] = preprocessed

                elif agent.name == "EntityExtractionAgent":
                    entity_results = agent.process(report)
                    analysis_results[agent.name] = entity_results

                elif agent.name == "DiagnosisAgent":
                    if entity_results:
                        diagnosis_results = agent.process(report, entity_results.get('entities', {}))
                        analysis_results[agent.name] = diagnosis_results
                        diagnoses = diagnosis_results.get('diagnoses', [])

                elif agent.name == "AlertAgent":
                    alert_results = agent.process(report, diagnoses)
                    analysis_results[agent.name] = alert_results

            # Prepare final report
            final_report = {
                "patient_id": report.patient_id,
                "report_date": report.report_date.isoformat(),
                "report_type": report.report_type,
                "analysis_results": analysis_results
            }

            return final_report

        except Exception as e:
            error_details = {
                "error": "Analysis failed",
                "details": str(e),
                "traceback": traceback.format_exc()
            }
            return error_details

def extract_text_from_pdf(pdf_file):
    """Enhanced PDF text extraction"""
    try:
        with fitz.open(pdf_file) as doc:
            text = ""
            for page in doc:
                page_text = page.get_text()
                if page_text:
                    text += page_text + "\n"

            # Additional validation
            if not text or len(text.strip()) < 10:
                print("WARNING: Extracted PDF text is too short")
                return ""

            return text
    except Exception as e:
        print(f"Error extracting PDF text: {e}")
        traceback.print_exc()
        return ""

def analyze_medical_report(pdf_file):
    """Comprehensive medical report analysis with error handling"""
    try:
        # Validate PDF file
        if not pdf_file:
            error_result = {
                "error": "No PDF file provided",
                "details": "Please upload a valid PDF file"
            }
            return error_result, str(error_result)

        # Extract text
        report_text = extract_text_from_pdf(pdf_file)

        # Validate extracted text
        if not report_text:
            error_result = {
                "error": "Could not extract text from PDF",
                "details": "The PDF may be empty or unreadable"
            }
            return error_result, str(error_result)

        # Analyze report
        analysis_system = ReportAnalysisSystem()
        results = analysis_system.analyze_report(report_text)

        # Return both JSON results and error message (if any)
        return results, results.get('error', 'Analysis completed successfully')

    except Exception as e:
        error_result = {
            "error": "Comprehensive analysis error",
            "details": str(e),
            "traceback": traceback.format_exc()
        }
        return error_result, str(error_result)

In [None]:
def main():
    # Gradio interface with comprehensive error handling
    demo = gr.Interface(
        fn=analyze_medical_report,
        inputs=gr.File(label="Upload PDF Medical Report"),
        outputs=[
            gr.JSON(label="Analysis Results"),
            gr.Textbox(label="Error/Status Messages")
        ],
        title="Medical Report Analysis System",
        description="Upload a PDF medical report for comprehensive analysis"
    )
    demo.launch(debug=True)

if __name__ == "__main__":
    main()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://854078236c349d1868.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
