In [1]:
import dspy

In [2]:
!pip install --upgrade dspy




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

True

In [3]:
import base64


In [29]:
from pydantic import BaseModel
class AnswerBlock(BaseModel):
    answer: str
    confidence: float
    reasoning: str

In [30]:
from dspy import Signature, InputField, OutputField

class GetDiameterSignature(Signature):
    """Determine bolt diameter from extracted image text."""

    ocr_text: str = InputField(desc="Text extracted from the image")
    diameter: str = OutputField(desc="Diameter of the bolt in mm")
    confidence: AnswerBlock =OutputField(desc="Answer")
#    reasoning: str = OutputField(desc="Explanation of how the diameter was determined from text")


In [31]:
import dspy
from dspy import Predict
from PIL import Image
import base64
import pytesseract
import io

class GetDiameterAgent(dspy.Module):
    def __init__(self):
        super().__init__()
        self.reason = Predict(GetDiameterSignature)

    def extract_text(self, base64_image: str) -> str:
        image_data = base64.b64decode(base64_image)
        image = Image.open(io.BytesIO(image_data))
        return pytesseract.image_to_string(image)

    def forward(self, state: dict):
        ocr_text = self.extract_text(state["image"])[:8000]  # Truncate if needed
        response = self.reason(ocr_text=ocr_text)

        return {
            "diameter": response.confidence
        }


In [32]:
class GetLengthSignature(Signature):
    """Determine bolt length from extracted image text."""

    ocr_text: str = InputField(desc="Text extracted from the image")
    length: str = OutputField(desc="Length of the bolt in mm")
    confidence: AnswerBlock =OutputField(desc="Answer")
   # reasoning: str = OutputField(desc="Explanation of how the diameter was determined from text")

In [33]:
class GetLengthAgent(dspy.Module):
    def __init__(self):
        super().__init__()
        self.reason = Predict(GetLengthSignature)

    def extract_text(self, base64_image: str) -> str:
        image_data = base64.b64decode(base64_image)
        image = Image.open(io.BytesIO(image_data))
        return pytesseract.image_to_string(image)

    def forward(self, state: dict):
        ocr_text = self.extract_text(state["image"])[:8000]  # Truncate if needed
        response = self.reason(ocr_text=ocr_text)

        return {
            "length": response.confidence
        }

In [34]:
class GetHeadTypeSignature(Signature):
    """Determine bolt head type from extracted image text. """

    ocr_text: str = InputField(desc="Text extracted from the image")
    head_type: str = OutputField(desc="Head type of the bolt")
    confidence: AnswerBlock =OutputField(desc="Answer")
   # reasoning: str = OutputField(desc="Explanation of how the diameter was determined from text")

In [35]:
class GetHeadTypeAgent(dspy.Module):
    def __init__(self):
        super().__init__()
        self.reason = Predict(GetHeadTypeSignature)

    def extract_text(self, base64_image: str) -> str:
        image_data = base64.b64decode(base64_image)
        image = Image.open(io.BytesIO(image_data))
        return pytesseract.image_to_string(image)

    def forward(self, state: dict):
        ocr_text = self.extract_text(state["image"])[:8000]  # Truncate if needed
        response = self.reason(ocr_text=ocr_text)

        return {

            "head_type": response.confidence
        }

In [36]:
def main():
    # Configure the LLM (adjust model if needed)
    dspy.settings.configure(
        lm=dspy.LM(model="gpt-5",max_tokens=None,
    max_completion_tokens=20_000,temperature=1)
        #lm=dspy.LM(model="gpt-4o")
    )

    # Load image and encode to base64
    with open("page1_img1.png", "rb") as f:
        image_base64 = base64.b64encode(f.read()).decode("utf-8")

    # Construct state
    state = {
        "image": image_base64,
        "history": []
    }

    # Run agent
    agent = GetDiameterAgent()
    result = agent.forward(state)

    agent2= GetLengthAgent()
    result2=agent2.forward(state)

    agent3=GetHeadTypeAgent()
    result3=agent3.forward(state)

    # Print results
    #print("Diameter:", result["diameter"])
    #print("Messages:", result["messages"])
    #print("Confidence",result["confidence"])

    print(result)
    print(result2)
    print(result3)

if __name__ == "__main__":
    main()

{'diameter': AnswerBlock(answer='10', confidence=0.94, reasoning="The OCR text includes 'MIOX30' and 'MIO-6g', which are common OCR misreads of 'M10x30' and 'M10-6g'. 'M10' denotes a 10 mm nominal thread diameter.")}
{'length': AnswerBlock(answer='30', confidence=0.95, reasoning='The drawing text includes "SCREW & WSHR M10x30 HEX" (OCR shows as "MIOX30"). In standard fastener notation, M10Ã—30 specifies a 10 mm thread diameter and 30 mm length. Therefore, the bolt length is 30 mm.')}
{'head_type': AnswerBlock(answer='Hex', confidence=0.9, reasoning="The drawing text includes 'SCREW & WSHR M10X30 HEX ...' and specifies '(15 A/F NOMINAL)', which is an across-flats dimension indicative of a hexagon head.")}
