In [83]:
import dspy
import os
from openai import OpenAI
import base64
from PIL import Image
from io import BytesIO
import requests
from pdf2image import convert_from_path


api_key=os.environ.get("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# Set up the LM.
turbo = dspy.OpenAI(model='gpt-4o')
dspy.settings.configure(lm=turbo)


In [85]:
#convert pdf to base64 image for GPT input

def encode_image_from_pil(image):
    buffer = BytesIO()  # Create an in-memory buffer
    image.save(buffer, format="JPEG", quality=30)  # Save the image to the buffer in JPEG format
    buffer.seek(0)  # Reset buffer position to the start
    return base64.b64encode(buffer.read()).decode('utf-8')  # Encode to base64 and return

pdf_path = 'Data/187716-1 Klantorder.pdf'
tegel_table = convert_from_path(pdf_path, first_page=3, last_page=3)
tegel_table = tegel_table[0]

vloertekening = egel_table = convert_from_path(pdf_path, first_page=6, last_page=6)
vloertekening = vloertekening[0]

base64_image = encode_image_from_pil(tegel_table)
print(base64_image)

/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDABsSFBcUERsXFhceHBsgKEIrKCUlKFE6PTBCYFVlZF9VXVtqeJmBanGQc1tdhbWGkJ6jq62rZ4C8ybqmx5moq6T/2wBDARweHigjKE4rK06kbl1upKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKT/wAARCAkjBnYDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDpqKSigBaSiigBaKSigBaKSigBaKSigBaKSloAKKKKACiikoAWikooAWikooAWikooAWikooAWikooAWikooAWikooAWikooAKKKKAFopKKAFopKKAFopKKAFopKKAFopKKAFopKKAFopKKAFpKKKAFopKKAFpKKKAClpKKAFo

In [109]:
#call gpt to parse image
def get_csv_tabe(contract_base64):
  headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
  }

  payload = {
    "model": "gpt-4o-mini",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "parse this image and give me the table weith each row in the table as a newline and each column seperated with a ;. Only respond with the table"
          },
          {
            "type": "image_url",
            "image_url": {
              "url": f"data:image/jpeg;base64,{contract_base64}"
            }
          }
        ]
      }
    ],
    "max_tokens": 300
  }

  response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)


  text = response.json()['choices'][0]['message']['content']
  return text


In [80]:
#convert to structured output

from io import StringIO
import pandas as pd
text = text.lstrip('```\n')
csv_data = StringIO(text)

df = pd.read_csv(csv_data, delimiter=';')
df


Unnamed: 0,aantal,artikelcode,kleur,omschrijving,bedrag
0,1 st,VTVW,,"Begane grond , Vloertegels","€ 10.066,50"
1,1 st,VTB,,De vloertegels buxy 706 invoegen,
2,1 st,VTPL,,Geen infiltratiewerken opnemen t.b.v. aanluiti...,
3,1 st,VTKEU,,De vloertegels ook onder de keuken doorleggen,
4,1 st,VTBOMR,,De matramondaring in hal rondom afwerken met e...,
5,61.80 m²,1442553,White,Vloertegels 250 x 1500 x 11 mm PorCST 250X1500...,
6,46.8 m²,TVB099,,"Het aanbrengen van vloertegels hal, keuken of ...",
7,1 st,TVB100,,Toeslag aanbrengen L-profiel t.b.v. afwerking ...,
8,2 st,PLR,,Roestvrijstaal L-profiel kleur rvs,
9,Subtotal,"Begane grond , Vloertegels","€ 10.066,50",,


In [101]:
class GenerateQuestions(dspy.Signature):
    """Based on the 'omschrijving' column, come up with questions that need to be validated"""

    order_lines = dspy.InputField(desc="Table with 'omschrijving' column")
    questions = dspy.OutputField(desc="list of questions")


In [102]:
class AnswerQuestions(dspy.Signature):
    """Validate if the questions can be answered based on the contract"""

    questions = dspy.InputField(desc="questions")
    answers = dspy.OutputField(desc="answers")


In [127]:
class Tile_order_checker_agent(dspy.Module):
    def __init__(self):
         super().__init__()

         self.generate_questions = dspy.ChainOfThought(GenerateQuestions)
        #  self.answer_questions = dspy.ChainOfThought(AnswerQuestions)

    def forward(self, contract):
         order_lines = get_csv_tabe(contract)
         questions = self.generate_questions(order_lines=order_lines)
        #  answers = self.answer_questions(questions)

         return questions





In [128]:
uncompiles_tile_order_checker_agent = Tile_order_checker_agent()
pred = uncompiles_tile_order_checker_agent(base64_image)

print(f"Predicted Answer: {pred}")

Predicted Answer: Prediction(
    rationale="produce the questions. We need to analyze the 'omschrijving' column to identify any potential issues or ambiguities that require validation. The 'omschrijving' column contains descriptions of order lines, which may include product details, instructions, or specifications. To ensure accuracy and clarity, we should consider the following steps:\n\n1. Identify any incomplete or unclear descriptions that may need further clarification.\n2. Check for consistency in the use of terms and measurements.\n3. Look for any missing information that is critical for understanding the order line.\n4. Verify that the descriptions align with the expected format or standards.",
    questions="1. Are there any descriptions in the 'omschrijving' column that are incomplete or unclear, requiring further clarification?\n2. Is there consistency in the use of"
)


In [81]:
#create signature
class BasicQA(dspy.Signature):
    """check of het type vloertegel is benoemd in de tekst"""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="ja of nee antwoord")


#create example input from parsed table
qa_pair = dspy.Example(question=text, answer="Ja")

generate_answer = dspy.Predict(BasicQA) #define predictor
pred = generate_answer(question=qa_pair.question) #test prediction

print(f"Question: {qa_pair.question}")
print(f"Predicted Answer: {pred.answer}")

Question: aantal;artikelcode;kleur;omschrijving;bedrag
1 st;VTVW;;Begane grond , Vloertegels;€ 10.066,50
1 st;VTB;;De vloertegels buxy 706 invoegen;
1 st;VTPL;;Geen infiltratiewerken opnemen t.b.v. aanluitings vloertegels op de wand.;
1 st;VTKEU;;De vloertegels ook onder de keuken doorleggen;
1 st;VTBOMR;;De matramondaring in hal rondom afwerken met een L-profiel kleur RVS;
61.80 m²;1442553;White;Vloertegels 250 x 1500 x 11 mm PorCST 250X1500 Tanza.White 100297209 11Mm Mat Ret;
46.8 m²;TVB099;;Het aanbrengen van vloertegels hal, keuken of gedeelte tegels van 30x180cm. (exclusief tegels) 1 lijn (butter-floating method) per m2;
1 st;TVB100;;Toeslag aanbrengen L-profiel t.b.v. afwerking rand per stuk excl. profiel;
2 st;PLR;;Roestvrijstaal L-profiel kleur rvs;
Subtotal ;Begane grond , Vloertegels ;€ 10.066,50
Tota
Predicted Answer: ja
