PDF Text extraction

In [2]:
import fitz

def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document[page_num]
        text += page.get_text()
    document.close()
    return text.strip()

pdf_text = extract_text_from_pdf("report.pdf")
print(f"Extracted text (first 500 chars): {pdf_text[:500]}")


Extracted text (first 500 chars): BP:190,
Blood Sugar:34,
Heart Rate:90


Quadrant code ##NOT USED

In [4]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

# Initialize the encoder and Qdrant client
encoder = SentenceTransformer("all-MiniLM-L6-v2")
client = QdrantClient(":memory:")  # For in-memory use; replace with cluster host info for persistent storage


collection_name = "medical_reports"
client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by the encoder
        distance=models.Distance.COSINE,  # Use cosine similarity
    ),
)

def chunk_text(text, chunk_size=500):
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

chunks = chunk_text(pdf_text)

client.upload_points(
    collection_name=collection_name,
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(chunk).tolist(),
            payload={"chunk": chunk}
        )
        for idx, chunk in enumerate(chunks)
    ],
)

print(f"Stored {len(chunks)} chunks in Qdrant.")


def query_relevant_chunks(query_text, top_k=5):
    query_vector = encoder.encode(query_text).tolist()
    hits = client.query_points(
        collection_name=collection_name,
        query=query_vector,
        limit=top_k,
    ).points
    return [hit.payload["chunk"] for hit in hits]

query = "summarize the medical findings"
relevant_chunks = query_relevant_chunks(query)

print("Relevant chunks for summarization:")
for chunk in relevant_chunks:
    print(chunk[:200], "...")





  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [3]:
from openai import OpenAI
from settings import *
import os

generation_model_name: str
temperature: float = 0.9
top_p = 0.9
max_tokens: int = 2048
stream: bool = True
llm_name: str = "Meta-Llama"

monster_client = OpenAI(
    base_url="https://llm.monsterapi.ai/v1/",
    api_key=str(MONSTER_API_KEY)
)

monster_ai_model_name = {
    "Google-Gemma": "google/gemma-2-9b-it",
    "Mistral": "mistralai/Mistral-7B-Instruct-v0.2",
    "Microsoft-Phi": "microsoft/Phi-3-mini-4k-instruct",
    "Meta-Llama": "meta-llama/Meta-Llama-3.1-8B-Instruct",
}

# Context and Summarization Prompt
message = [
    {"role": "system", "content": "You are an AI assistant skilled in summarizing complex medical reports."},
    {"role": "user", "content": f"Summarize the following medical report in simple terms. Focus on the key findings, diagnoses, and recommendations. Report text:\n\n{pdf_text}"}
]

# Generate Summary
response = monster_client.chat.completions.create(
    model=monster_ai_model_name[llm_name],
    messages=message,
    temperature=temperature,
    top_p=top_p,
    max_tokens=max_tokens,
    stream=stream
)

# Collect Generated Text
generated_text = ""
for chunk in response:
    if chunk.choices[0].delta.content is not None:
        generated_text += chunk.choices[0].delta.content

print("Generated Summary:")
print(generated_text)



Generated Summary:
Based on the medical report, here's a summary of the key findings, diagnoses, and recommendations in simple terms:

**Key Findings:**

* Blood Pressure (BP): 190, which is higher than the normal range (typically below 120/80).
* Blood Sugar: 34, which is lower than the normal range (typically between 70-110).
* Heart Rate: 90, which is higher than the normal range (typically between 60-100).

**Diagnoses:**

The report does not explicitly mention a diagnosis, but based on the abnormal values, it can be inferred that the patient may be experiencing:

* Hypertension (high blood pressure)
* Hypoglycemia (low blood sugar)
* Tachycardia (fast heart rate)

**Recommendations:**

The report does not provide specific recommendations, but it's likely that the patient will need to undergo further testing and evaluation to determine the underlying cause of these abnormal values. This may include:

* Blood pressure management through lifestyle changes (diet, exercise, stress redu

In [12]:
import fitz
from openai import OpenAI
from settings import *
import gradio as gr

# PDF Text Extraction Function
def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document[page_num]
        text += page.get_text()
    document.close()
    return text.strip()

# Monster API Client Configuration
generation_model_name: str
temperature: float = 0.9
top_p = 0.9
max_tokens: int = 2048
stream: bool = False  # Set to False for simplicity in Gradio
llm_name: str = "Meta-Llama"

monster_client = OpenAI(
    base_url="https://llm.monsterapi.ai/v1/",
    api_key=str(MONSTER_API_KEY)
)

monster_ai_model_name = {
    "Google-Gemma": "google/gemma-2-9b-it",
    "Mistral": "mistralai/Mistral-7B-Instruct-v0.2",
    "Microsoft-Phi": "microsoft/Phi-3-mini-4k-instruct",
    "Meta-Llama": "meta-llama/Meta-Llama-3.1-8B-Instruct",
}

# Summarization Function
def summarize_pdf(file):
    try:
        # Extract text from the uploaded PDF
        pdf_text = extract_text_from_pdf(file.name)

        # Create context for the AI summarization
        message = [
            {"role": "system", "content": "You are an AI assistant skilled in summarizing complex medical reports."},
            {"role": "user", "content": f"Summarize the following medical report in simple terms. Focus on the key findings, diagnoses, and recommendations. Report text:\n\n{pdf_text}"}
        ]

        # Generate summary using Monster API
        response = monster_client.chat.completions.create(
            model=monster_ai_model_name[llm_name],
            messages=message,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            stream=False  # Simplify response handling
        )

        # Extract the summary from the response
        generated_text = response.choices[0].message.content
        return generated_text

    except Exception as e:
        return f"Error occurred: {e}"


# Gradio Interface
interface = gr.Interface(
    fn=summarize_pdf,
    inputs=gr.File(label="Upload your Medical Report (PDF)"),
    outputs=gr.Textbox(label="Generated Summary"),
    title="Medical Report Summarizer",
    description="Upload a medical report in PDF format, and this tool will generate a summary focusing on key findings, diagnoses, and recommendations."
)

# Launch the Interface
interface.launch()

response = monster_client.chat.completions.create(
    model=monster_ai_model_name[llm_name],
    messages=message,
    temperature=temperature,
    top_p=top_p,
    max_tokens=max_tokens,
    stream=stream
)

print("API Response:", response)


Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


IMPORTANT: You are using gradio version 4.16.0, however version 4.44.1 is available, please upgrade.
--------
API Response: ChatCompletion(id='chat-cb44d4cf61f2418c9f7697f536c6814e', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Based on the provided medical report, here's a summary of the key findings, diagnoses, and recommendations in simple terms:\n\n**Key Findings:**\n\n- Blood pressure (BP) is high, at 190, which can be a sign of hypertension.\n- Blood sugar (34) is within the normal range, indicating that diabetes is not a concern at this time.\n- Heart rate (90) is slightly elevated, but this can be normal for some people.\n\n**Diagnosis:**\n\nBased on the report, there are no clear diagnoses mentioned, but the high blood pressure (hypertension) is a notable finding that requires further investigation.\n\n**Recommendations:**\n\n- The patient's high blood pressure needs to be monitored and managed to prevent potential health