In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT")
# os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")


In [3]:
import cv2

def process_image(path):

    img = cv2.imread(path)

    height, width = img.shape[:2]

    min_dimension = 400

    scale_factor = max(min_dimension / width, min_dimension / height)

    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)

    resized_image = cv2.resize(img, (new_width, new_height))

    cv2.imwrite(f'{path}_processed.jpg', resized_image)


In [6]:
import base64
from pathlib import Path

llm = ChatOpenAI(model="gpt-4o-mini")
output_parser = StrOutputParser()
image_path = "my_presc4.jpeg"

process_image(Path.cwd()/f"{image_path}")

image_file = Path.cwd()/f"{image_path}_processed.jpg"

with image_file.open("rb") as file:
    base64_image_data = base64.b64encode(file.read()).decode('utf-8')


message = HumanMessage(
    content=[
        {"type": "text", "text": """The image that you will receive now is an image of a medical prescription. 
This is an  example of a JSON format that we are using to store prescription data.
"patient": {
           "name": "John Doe",
           "age": 45,
           "gender": "male",
           "prescribedBy": "Dr. Sarah Smith"
         },
         "prescription": [
           {
             "medicine": "Paracetamol",
             "dosage": "500mg",
             "timing": ["after lunch"]
           },
           {
             "medicine": "Amoxicillin",
             "dosage": "250mg",
             "timing": ["before lunch", "after lunch"]
           },
           {
             "medicine": "Vitamin D3",
             "dosage": "1000IU",
             "colour": "yellow",
             "timing": ["with breakfast"]
           },
           {
             "medicine": "Metformin",
             "dosage": "850mg",
             "timing": ["after breakfast", "after dinner"]
           }
         ]
        Please fill these values from the image I give you, and return only the JSON string. dont write anything else. 
        """},
        {
            "type": "image_url",
            "image_url": {"url": f"data:image/jpeg;base64,{base64_image_data}"},
        },
    ],
)
response = llm.invoke([message])
print(response.content)



```json
{
  "patient": {
    "name": "Shubham Shinde",
    "age": 20,
    "gender": "male",
    "prescribedBy": ""
  },
  "prescription": [
    {
      "medicine": "Paracetamol",
      "dosage": "1 tablet",
      "timing": ["before lunch"]
    },
    {
      "medicine": "Isotretinoin",
      "dosage": "1 tablet",
      "timing": ["after dinner"]
    }
  ]
}
```
