In [3]:
%pip install fitz pytesseract   -q

Note: you may need to restart the kernel to use updated packages.




In [4]:
%pip install frontend -q

Note: you may need to restart the kernel to use updated packages.




In [1]:
# app.py

import os
import requests
import json
import re
from datetime import datetime
from PIL import Image
import pytesseract
import fitz  # PyMuPDF for PDF handling
import io
import streamlit as st


# =========================
# OCR + Analysis Processor
# =========================
class MedicalOCRProcessor:
    def __init__(self, api_key):
        # API settings
        self.api_url = "https://openrouter.ai/api/v1/chat/completions"
        self.model = "openai/gpt-oss-20b:free"
        self.api_key = api_key

    def clean_text(self, text):
        if text is None:
            return ""
        return re.sub(r'[\x00-\x1F]+', ' ', text)

    def pdf_to_images(self, pdf_bytes):
        """Convert uploaded PDF to images"""
        images = []
        try:
            pdf_document = fitz.open("pdf", pdf_bytes)
            for page_num in range(len(pdf_document)):
                page = pdf_document.load_page(page_num)
                mat = fitz.Matrix(300 / 72, 300 / 72)  # 300 DPI
                pix = page.get_pixmap(matrix=mat)
                img_data = pix.tobytes("ppm")
                img = Image.open(io.BytesIO(img_data))
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                images.append(img)
            pdf_document.close()
            return images
        except Exception as e:
            st.error(f"❌ Error converting PDF to images: {e}")
            return []

    def analyze_medical_text(self, combined_text):
        """Send text to OpenRouter API for medical analysis"""
        if not self.api_key:
            st.error("❌ API key not provided.")
            return None

        max_chars = 12000
        if len(combined_text) > max_chars:
            combined_text = combined_text[:max_chars] + "... [text truncated]"

        prompt = f"""
ACT AS AN EXPERIENCED MEDICAL DOCTOR. Analyze the following medical reports:

{combined_text}

Provide structured medical analysis including:
1. Patient Summary
2. Clinical Findings
3. Diagnostic Impression
4. Critical Concerns
5. Treatment Recommendations
6. Lifestyle & Preventive Advice
7. Health Optimization Plan
8. Patient Education
"""

        try:
            response = requests.post(
                url=self.api_url,
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json",
                },
                data=json.dumps({
                    "model": self.model,
                    "messages": [
                        {"role": "system", "content": "You are an experienced medical doctor providing diagnostic analysis."},
                        {"role": "user", "content": prompt}
                    ],
                    "temperature": 0.3,
                }),
                timeout=60
            )

            if response.status_code != 200:
                st.error(f"❌ API Error: {response.status_code} - {response.text}")
                return None

            result = response.json()
            analysis_text = result.get('choices', [{}])[0].get('message', {}).get('content', '')
            return analysis_text

        except Exception as e:
            st.error(f"❌ Error calling API: {e}")
            return None


# =========================
# Streamlit App UI
# =========================
st.set_page_config(page_title="Medical OCR & Analysis", layout="wide")
st.title("🩺 Medical OCR & AI Analysis")

st.sidebar.header("⚙️ Settings")
api_key = st.sidebar.text_input("Enter OpenRouter API Key", type="password")

uploaded_files = st.file_uploader(
    "📂 Upload Medical Images or PDFs",
    type=["png", "jpg", "jpeg", "tiff", "bmp", "pdf"],
    accept_multiple_files=True
)

if uploaded_files and api_key:
    processor = MedicalOCRProcessor(api_key)
    all_ocr_text = ""

    for file in uploaded_files:
        file_ext = os.path.splitext(file.name)[1].lower()
        st.write(f"📄 Processing: {file.name}")

        if file_ext == ".pdf":
            pdf_bytes = file.read()
            pdf_images = processor.pdf_to_images(pdf_bytes)
            pdf_text = ""
            for i, img in enumerate(pdf_images, 1):
                ocr_text = pytesseract.image_to_string(img)
                pdf_text += f"\n--- Page {i} ---\n{ocr_text}\n"
            all_ocr_text += f"\n--- PDF: {file.name} ---\n{pdf_text}\n"

        else:
            img = Image.open(file)
            ocr_text = pytesseract.image_to_string(img)
            all_ocr_text += f"\n--- Image: {file.name} ---\n{ocr_text}\n"

    if all_ocr_text.strip():
        st.subheader("📜 Extracted OCR Text")
        st.text_area("OCR Output", all_ocr_text, height=300)

        if st.button("🔍 Run Medical Analysis"):
            with st.spinner("Analyzing medical reports..."):
                analysis = processor.analyze_medical_text(all_ocr_text)
                if analysis:
                    st.subheader("🧾 AI Medical Analysis")
                    st.write(analysis)

                    # Save results
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    json_data = {
                        "analysis": analysis,
                        "timestamp": timestamp
                    }

                    st.download_button(
                        label="⬇️ Download Analysis (JSON)",
                        data=json.dumps(json_data, indent=2),
                        file_name=f"medical_analysis_{timestamp}.json",
                        mime="application/json"
                    )

                    st.download_button(
                        label="⬇️ Download Analysis (TXT)",
                        data=analysis,
                        file_name=f"medical_analysis_{timestamp}.txt",
                        mime="text/plain"
                    )



2025-09-15 22:49:28.500 
  command:

    streamlit run c:\ProgramData\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-09-15 22:49:28.508 Session state does not function when running a script without `streamlit run`


In [2]:
from PIL import Image
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

img = Image.open("download.png")  # replace with an actual image
text = pytesseract.image_to_string(img)
print(text)


It was the best of
times, it was the worst
of times, it was the age
of wisdom, it was the
age of foolishness...



In [3]:
import pytesseract

# Explicit path for Windows
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"


In [4]:
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'
