In [9]:
import cv2
import pytesseract
import numpy as np
import pandas as pd
import re, os, sys

# Set tesseract executable path (update this path if needed)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

In [4]:
image_path = r"C:\\Users\\VIRAJ M\\Desktop\\sample_prescription.png"
img = cv2.imread(image_path)

if img is None:
    print("Image not found.")
    sys.exit(1)

In [5]:

gray  = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur  = cv2.GaussianBlur(gray, (5, 5), 0)
th    = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY_INV, 11, 2)


In [6]:
h, w = th.shape
patient_roi = th[40:110 , 40:440]
date_roi    = th[40:110 , 460:700]
table_roi   = th[140:600, 40:700]

In [7]:
def ocr_image(binary_img, psm=6):
    inv = cv2.bitwise_not(binary_img)
    config = f'--oem 3 --psm {psm}'
    raw = pytesseract.image_to_string(inv, config=config)
    return re.sub(r'[\n\r\t\f\v]+', ' ', raw).strip()

In [10]:
patient_name = ocr_image(patient_roi, psm=7)
date_text    = ocr_image(date_roi, psm=7)

In [11]:
print("Patient:", patient_name)
print("Date   :", date_text)

Patient: 
Date   : 


In [12]:

table_inv = cv2.bitwise_not(table_roi)
contours, _ = cv2.findContours(table_inv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


In [14]:

row_entries = []

for cnt in contours:
    x, y, wc, hc = cv2.boundingRect(cnt)
    if hc < 20 or wc < 50:
        continue

    row_img = table_roi[y:y+hc, x:x+wc]
    col_w = wc // 3
    med_img  = row_img[:, 0       :col_w]
    dose_img = row_img[:, col_w   :2*col_w]
    freq_img = row_img[:, 2*col_w :wc]

    med_text  = ocr_image(med_img , psm=7)
    dose_text = ocr_image(dose_img, psm=7)
    freq_text = ocr_image(freq_img, psm=7)

    row_entries.append((y, [med_text, dose_text, freq_text]))

In [15]:
row_entries.sort(key=lambda tup: tup[0])
rows_sorted = [tup[1] for tup in row_entries]

df = pd.DataFrame(rows_sorted, columns=["Medicine", "Dosage", "Frequency"])
print(df)

csv_name = os.path.splitext(image_path)[0] + "_output.csv"
df.to_csv(csv_name, index=False)
print(f"Saved to: {csv_name}")

                    Medicine                      Dosage         Frequency
0  OR: (Ful name, address, &  phone number) (it under. 1  2, give. 390)” =
1                 John: Doe.                      WZ USN                  
2                       - ce                                              
3               US25.-Nevert           torqotreri... -(t       10:1 78)...
4                                                                         
5                                                     23                  
6                                              forgottén        (00 178) |
7                                                                         
Saved to: C:\\Users\\VIRAJ M\\Desktop\\sample_prescription_output.csv
