In [18]:
! pip install llama-cloud-services llama-index-core llama-index-readers-file python-dotenv
! pip install groq




In [19]:
LLAMA_PARSER_API_KEY = "llx-jQp0zIARvoLlHMVHvytyjtoEJ7nqaGt7eFMGgV9eWKEhWCi5"


In [25]:
import nest_asyncio
nest_asyncio.apply()

from llama_cloud_services import LlamaParse
from llama_index.core import SimpleDirectoryReader

parser = LlamaParse(
    api_key=LLAMA_PARSER_API_KEY,
    result_type="markdown"  # "markdown" and "text" are available
)

# Use SimpleDirectoryReader to parse the file (simulating user-uploaded PDF)
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(
    input_files=['/Users/taimourabdulkarim/Documents/Personal Github Repositories/HealthSync-AI/backend/data/blood_reports/MR. AAHID ZAKI.pdf'],
    file_extractor=file_extractor
).load_data()


Started parsing the file under job_id 8699a0a1-a394-4936-86d7-432d5d55d903


In [None]:
import re

# Extract the text from documents
patient_report = documents[0].text  # Assuming single document

# Extract age and gender from the report (using regex for robustness)
age_match = re.search(r"Age:\s*([\d\sYMWD]+)", patient_report)
gender_match = re.search(r"Gender:\s*(Male|Female)", patient_report)
patient_age = age_match.group(1).strip() if age_match else "Unknown"
patient_gender = gender_match.group(1).strip() if gender_match else "Unknown"


In [None]:
import os
from pprint import pprint
from groq import Groq
from dotenv import load_dotenv
from IPython.display import display_markdown

# Set Groq API key
os.environ["GROQ_API_KEY"]= ""
client = Groq()


In [None]:
# Step 1: Generate structured JSON for haematology_results
generation_chat_history = [
    {
        "role": "system",
        "content": "You are an expert data parser tasked with generating high-quality, structured JSON output. "
                   "Your task is to parse the provided patient report data and output *only* valid JSON (enclosed in ```json ... ```) "
                   "with no additional text or explanations. If the user provides critique, respond with a revised version."
    },
    {
        "role": "user",
        "content": f"""
Please parse the provided patient report data and format the output in a structured JSON format. The JSON must be enclosed in ```json ... ``` and include only the JSON object, with no additional text. The JSON should have the following sections with exact key names:

1. **patient_info**: Include:
   - **age**: The patient's age (e.g., "3 Y 0 M 0 D" or "Unknown").
   - **gender**: The patient's gender (e.g., "Male", "Female", or "Unknown").
2. **haematology_results**: Include all test results from the HAEMATOLOGY section, with each test containing:
   - **test**: The name of the test (e.g., "WBC Count").
   - **patient_value**: The patient's test result (e.g., "12580").
   - **unit**: The unit of measurement (e.g., "μL").
   - **reference_value**: The reference range specific to the patient's age and gender (e.g., for a {patient_age} {patient_gender}, use ranges like "Child 2 Mon- 6 Yrs: 5,000-15,000" where applicable).
   - **remark**: Indicate whether the patient value is "Normal", "Low", or "High" based on the reference_value. For ranges with multiple groups (e.g., adults, children), use the range appropriate for the patient’s age ({patient_age}) and gender ({patient_gender}). If the reference range is descriptive (e.g., "< 1-2 %"), infer the remark based on reasonable interpretation.

Exclude empty entries like "Complete Blood Picture" and any non-test data (e.g., metadata, notes, consultants). Ensure the JSON is well-structured, with appropriate nesting and consistent formatting. Here is the patient report data to process:

{patient_report}
"""
    }
]


In [30]:
# Call Groq API to generate the JSON output
response = client.chat.completions.create(
    messages=generation_chat_history,
    model="llama3-70b-8192"
).choices[0].message.content

# Append the response to chat history
generation_chat_history.append(
    {
        "role": "assistant",
        "content": response
    }
)


In [None]:
import json

# Clean the response to extract valid JSON
json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
if json_match:
    json_str = json_match.group(1)
else:
    json_str = response  # Fallback to raw response if no code block found

# Try to parse the response as JSON
try:
    json_output = json.loads(json_str)
    # Pretty print the JSON output
    print("Structured Haematology Results:")
    print(json.dumps(json_output, indent=2))
    # Save the JSON output to a file
    with open("patient_report.json", "w") as f:
        json.dump(json_output, f, indent=2)
except json.JSONDecodeError as e:
    # If the response is not valid JSON, display it for debugging
    print(f"Error: Response is not valid JSON. Error: {str(e)}")
    print("Raw response:")
    display_markdown(response, raw=True)
    # Save the raw response for inspection
    with open("raw_response.txt", "w") as f:
        f.write(response)
    json_output = {}  # Empty output to avoid further errors


Structured Haematology Results:
{
  "patient_info": {
    "age": "3 Y 0 M 0 D",
    "gender": "Male"
  },
  "haematology_results": [
    {
      "test": "WBC Count",
      "patient_value": "12580",
      "unit": "\u03bcL",
      "reference_value": "Child 2 Mon- 6 Yrs: 5,000-15,000",
      "remark": "Normal"
    },
    {
      "test": "RBC Count",
      "patient_value": "4.34",
      "unit": "10^6/\u03bcL",
      "reference_value": "M: 4.5 - 5.5",
      "remark": "Normal"
    },
    {
      "test": "Haemoglobin",
      "patient_value": "11.3",
      "unit": "g/dl",
      "reference_value": "M: 13-17",
      "remark": "Low"
    },
    {
      "test": "HCT",
      "patient_value": "33.4",
      "unit": "%",
      "reference_value": "Male: 40-54",
      "remark": "Low"
    },
    {
      "test": "MCH",
      "patient_value": "26.1",
      "unit": "fl",
      "reference_value": "27-32",
      "remark": "Low"
    },
    {
      "test": "MCV",
      "patient_value": "77.0",
      "unit": "pg"

In [35]:
# Step 2: Interpret the CBC report in a short, simple, and friendly way
if json_output:
    interpretation_chat_history = [
        {
            "role": "system",
            "content": "You are a friendly medical AI assistant who explains blood test results in very simple, kind words for people who don’t know medical terms. "
                       "Your task is to look at the blood test results for a patient of any age, find any problems, and explain them in a short, easy way, like talking to a family member. "
                       "Use examples like 'Blood is like a team that carries oxygen.' "
                       "Output *only* plain text with no JSON or code blocks, keeping it short and clear."
        },
        {
            "role": "user",
            "content": f"""
Please analyze the provided blood test results for a patient with age "{patient_age}" and gender "{patient_gender}" and give a short, simple explanation in plain text that anyone can understand. Do not use JSON or code blocks; output only the text explanation. Follow these guidelines:

### Input Data
The blood test results are in a JSON object with:
- **patient_info**: Contains:
  - **age**: The patient's age (e.g., "3 Y 0 M 0 D" or "Unknown").
  - **gender**: The patient's gender (e.g., "Male", "Female", or "Unknown").
- **haematology_results**: A list of tests, each with:
  - **test**: Name of the test (e.g., "Haemoglobin").
  - **patient_value**: The patient’s result (e.g., "11.3").
  - **unit**: How it’s measured (e.g., "g/dl").
  - **reference_value**: The normal range for the patient’s age and gender (e.g., "11-14").
  - **remark**: Shows if the result is "Normal", "Low", or "High".

Tests include:
- Haemoglobin: How much oxygen the blood carries.
- MCV: Size of red blood cells.
- RBC Count: Number of red blood cells.
- HCT: How much of the blood is red blood cells.
- RDW: If red blood cells are different sizes.
- WBC Count: Number of white blood cells that fight germs.
- Neutrophils, Lymphocytes, Monocytes, Eosinophils, Basophils: Types of white blood cells.
- Platelet Count: Cells that help stop bleeding.

### Patient Information
The patient is {patient_age} old and {patient_gender}. They feel tired and look pale. No known sickness or bleeding.

### How to Explain
1. **Find Problems**: Look at the `remark` field ("Low" or "High") to see what’s not normal. Check `patient_value` against `reference_value`.
2. **Keep It Short and Simple**:
   - Say what’s wrong in 2-3 sentences (e.g., “Your red blood cells are too small.”).
   - Use examples: “Red blood cells are like trucks carrying oxygen to your body.”
   - Avoid medical words like “anemia” or “leukocytosis.”
3. **Explain Why**:
   - Say why it might be happening in 1-2 sentences, tailored for the patient’s age (e.g., for a child, “Maybe not enough iron from food”; for an adult, “Maybe stress or not enough iron”).
   - Mention age-appropriate causes (e.g., iron deficiency for kids, infections for all ages).
4. **What to Do**:
   - Suggest 1-2 easy steps (e.g., “Get a test to check iron.”).
   - Say why: “This will help make the blood stronger.”
   - Adjust for age (e.g., “See a kids’ doctor” for children, “See your doctor” for adults).
5. **When to Worry**:
   - Say when to see a doctor quickly in 1 sentence (e.g., “If very tired, go to the doctor now.”).

### Output
Write a short plain text explanation (no JSON or code blocks) with:
- What’s wrong: 2-3 sentences about the problems.
- Why it might be happening: 1-2 sentences about possible reasons.
- What to do: 1-2 sentences with easy steps.
- When to worry: 1 sentence about urgent signs.

### Safety
- Use the normal ranges from the JSON for the patient’s age ({patient_age}) and gender ({patient_gender}).
- Warn about serious problems (e.g., Haemoglobin <7 g/dL, Platelets <20 × 10⁹/L, WBC >20 × 10⁹/L) by saying “See a doctor right away.”
- Don’t say it’s definitely one problem; suggest a doctor check.

### Blood Test Results (JSON)
{json.dumps(json_output, indent=2)}

Analyze the results and give a short, simple explanation in plain text.
"""
        }
    ]


In [None]:
interpretation_response = client.chat.completions.create(
    messages=interpretation_chat_history,
    model="llama3-70b-8192"
).choices[0].message.content

# Append the interpretation response to chat history
interpretation_chat_history.append(
    {
        "role": "assistant",
        "content": interpretation_response
    }
)

# Print and save the plain text interpretation
print("\nSimple Blood Test Explanation:")
print(interpretation_response)
# Save the plain text to a file
with open("cbc_interpretation.txt", "w") as f:
    f.write(interpretation_response)



Simple Blood Test Explanation:
Here's what's going on with your blood:

Your red blood cells are carrying less oxygen than they should. This might be making you feel tired and look pale. It's like having a team of trucks that can't carry as much oxygen as they need to.

This might be happening because you're not getting enough iron from your food. Iron helps your blood cells carry oxygen better.

You should get a test to check your iron levels. This will help make your blood stronger. If you're feeling very tired, go to the doctor now.

Remember to see a kids' doctor to check on this further.
