In [114]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
import google.generativeai as genai


In [115]:
load_dotenv()

True

In [116]:
llm = ChatGoogleGenerativeAI(
    model= 'gemini-2.5-flash',
    temperature = 0

)

In [117]:
import base64

img_path = 'C:/Users/DELL/OneDrive/Desktop/FA.jpg'



In [118]:

def read_image_base64(img_path):
    with open (img_path,'rb') as img_data:
        img_base64 = base64.b64encode(img_data.read()).decode("utf-8")

    return img_base64



In [119]:
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel,Field
from typing import List

class InvoiceItem(BaseModel):
    description: str = Field(..., description="item's description")
    taxes: float = Field(..., description="item's taxes")


class InvoiceData(BaseModel):
    buyer_name: str = Field(..., description="buyer's name in the invoice")
    invoice_date: str = Field(..., description="invoice date in the invoice")
    items: List [InvoiceItem] = Field(..., description="items' data in the invoice")

parser = PydanticOutputParser(pydantic_object=InvoiceData)

In [120]:
examples = [
    {
        "invoice_img": read_image_base64(img_path),
        "Invoice_data": InvoiceData(
                            buyer_name="Deco Addict",
                            invoice_date="04-04-2015",
                            items= [
                                InvoiceItem(
                                    description='Restraunt',
                                    taxes= 0.2
                                ),
                                InvoiceItem(
                                    description='Architect sur feuilles de temps',
                                    taxes= 0.2
                                )
                            ]
        ).model_dump_json()
    }
]

In [121]:
few_shot_example_prompt = ChatPromptTemplate.from_messages([
    (
        "user",
        [
            {
                "type": "image",
                "source_type": "base64",
                "data": "{invoice_img}",
                "mime_type": "image/jpeg",
            }
        ],
    ),
    ("assistant", "{Invoice_data}")
])

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt= few_shot_example_prompt,
    examples=examples
)


if __name__ == "__main__":

    for msg in few_shot_prompt.invoke({}).to_messages():
        msg.pretty_print()


[{'type': 'image', 'source_type': 'base64', 'data': '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQgJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAkjBnUDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACi

In [122]:
msgs = ChatPromptTemplate.from_messages([
    ('system', '''You are an OCR assistant. 
     Extract data from the invoice image strictly following this schema:\n{output_schema}'''),
     few_shot_prompt,
    {
        "role":"user",
        "content":[
            {
                "type":"image",
                "source_type":"base64",
                'data':"{image_data}",
                "mime_type":"image/jpeg",
            },
        ],
    },

])

In [123]:
pipeline = msgs | llm | parser

In [124]:
def get_invoice_data(img_path0):
    output = pipeline.invoke({
        "image_data": read_image_base64(img_path0),
        "output_schema": parser.get_format_instructions()
    })
    return output
