In [None]:
import cv2 #type: ignore
import torch #type: ignore
import easyocr #type: ignore
import numpy as np #type: ignore
from PIL import Image #type: ignore
import google.generativeai as genai #type: ignore
import torchvision.transforms as transforms #type: ignore
from torchvision.models.detection import fasterrcnn_resnet50_fpn #type: ignore

In [None]:
# Load pre-trained Faster R-CNN model for text detection
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])  # 'en' for English, add more languages if needed

# Initialize Google AI model api key
genai.configure(api_key="API -KEY :D")

# initialize the model
gemini = genai.GenerativeModel('gemini-1.5-flash')

In [None]:
# Define image transformations
transform = transforms.Compose([
    transforms.ToTensor(),
])

def detect_text_regions(image):
    # Convert PIL Image to tensor
    img_tensor = transform(image).unsqueeze(0)
    
    # Perform inference
    with torch.no_grad():
        prediction = model(img_tensor)
    
    # Extract bounding boxes
    boxes = prediction[0]['boxes'].cpu().numpy()
    scores = prediction[0]['scores'].cpu().numpy()
    
    # Filter boxes with high confidence
    threshold = 0.5
    filtered_boxes = boxes[scores >= threshold]
    
    return filtered_boxes

def perform_ocr(image, boxes):
    # Convert PIL Image to OpenCV format
    img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    
    text_results = []
    
    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        roi = img_cv[y1:y2, x1:x2]
        
        # Perform OCR on the region of interest using EasyOCR
        result = reader.readtext(roi)
        
        # Extract text from the result
        text = ' '.join([entry[1] for entry in result])
        text_results.append(text.strip())
    
    return text_results

def process_prescription_image(image_path):
    # Load image
    image = Image.open(image_path)
    
    # Detect text regions
    text_boxes = detect_text_regions(image)
    
    # Perform OCR on detected regions
    ocr_results = perform_ocr(image, text_boxes)
    
    return ocr_results

In [None]:
# Example usage
image_path = "ty.jpg"
results = process_prescription_image(image_path)

print("Extracted text from the prescription:")
extracted_text = "\n".join(results)

In [None]:
PROMPT = f"Give me basic details of the patient, name, age, sex, date, time, and prescription details of the report from the following extracted text of the medical prescription: \n\n"

response = gemini.generate_content(PROMPT + extracted_text)
print(response.text)