In [9]:
!pip install -qU \
    python-dotenv \
    langchain \
    langchain-community \
    openai \
    langchain-openai      

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import fitz  # PyMuPDF
import os

In [4]:
from pydantic import BaseModel, Field
from typing import Optional, Literal

class AnswerBlock(BaseModel):
    answer: str
    confidence: float
    reasoning: str


class BoltFeatures(BaseModel):
    part_name: Optional[str] = None
    title: Optional[str] = None
    diameter: AnswerBlock = None  
    length: Optional[str] = None    
    thread_pitch: Optional[str] = None
    head_type: AnswerBlock = Field()
    drive_type: Literal["phillips", "hex", "slotted", "torx", "allen", "pozidriv", "square", "spanner"] = Field()
    material: Optional[str] = None
    coating: Optional[str] = None
    is_threaded_full: Optional[bool] = None


In [5]:
from langchain.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=BoltFeatures)

In [6]:
%%capture --no-stderr
%pip install -U langgraph langchain_community langchain_openai langchain_experimental langchain-chroma pypdf sentence-transformers


In [7]:
import openai
import os
import base64
from openai import OpenAI
from typing import Literal
from typing_extensions import TypedDict
from langgraph.graph import MessagesState, START, END
from langgraph.types import Command
from langchain_openai import ChatOpenAI

In [8]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [9]:
import base64

def load_png_as_base64(path: str) -> str:
    with open(path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_string


In [10]:
from langchain_openai import ChatOpenAI

# Use GPT-4o or gpt-4-vision-preview for image support
llm = ChatOpenAI(model="gpt-4o", temperature=0.0)


In [11]:
from langchain_core.messages import BaseMessage, HumanMessage
from typing_extensions import TypedDict
from typing import Annotated, List
import operator

class BoltState(TypedDict):
    messages: Annotated[list[BaseMessage], operator.add]
    image: str
    diameter: str
    head_type: str
    length: str

def add_message(state: BoltState, message: BaseMessage):
    return state.get("messages", []) + [message]


In [12]:
def get_diameter(state: BoltState):
    message = HumanMessage(content=[
        {"type": "text", "text": "From the image below,determine the **diameter** of the bolt (e.g., M10 = 10mm nominal). "
    "Explain how it was derived from the image, such as notation like 'M10', 'Ø', or dimension labels."
    "Return your answer in this JSON format:\n\n"
    '{\n'
    '  "answer": "<length in mm>",\n'
    '  "confidence": <float between 0 and 1>,\n'
    '  "reasoning": "<how you arrived at the answer>"\n'
    '}\n\n'
    "Rate your confidence based on clarity of the dimension, presence of labels, and whether your interpretation might be ambiguous."},
        {"type": "image_url", "image_url": {"url": "data:image/png;base64," + state["image"]}}
    ])
    response = llm.invoke([message])
    return {
        
        "messages": add_message(state, response),
        "diameter": response.content.strip()
    }

def get_head_type(state: BoltState):
    message = HumanMessage(content=[
        {"type": "text", "text": "From the technical drawing, identify the **head type** of the bolt (e.g., hex, socket, pan, etc.). "
    "Explain how it was derived from the image."
    "Return your answer in this JSON format:\n\n"
    '{\n'
    '  "answer": "",\n'
    '  "confidence": <float between 0 and 1>,\n'
    '  "reasoning": "<how you arrived at the answer>"\n'
    '}\n\n'
    "Rate your confidence based on clarity of the dimension, presence of labels, and whether your interpretation might be ambiguous."},
        {"type": "image_url", "image_url": {"url": "data:image/png;base64," + state["image"]}}
    ])
    response = llm.invoke([message])
    return {
        
        "messages": add_message(state, response),
        "head_type": response.content.strip()
    }

def get_length(state: BoltState):
    message = HumanMessage(content=[
        {"type": "text", "text": "From the image below, extract the **length of the bolt**.\n\n"
    "Return your answer in this JSON format:\n\n"
    '{\n'
    '  "answer": "<length in mm>",\n'
    '  "confidence": <float between 0 and 1>,\n'
    '  "reasoning": "<how you arrived at the answer>"\n'
    '}\n\n'
    "Rate your confidence based on clarity of the dimension, presence of labels, and whether your interpretation might be ambiguous."},
        {"type": "image_url", "image_url": {"url": "data:image/png;base64," + state["image"]}}
    ])
    response = llm.invoke([message])
    return {
        
        "messages": add_message(state, response),
        "length": response.content.strip()
    }


In [13]:
from langgraph.graph import StateGraph, END
#disable cels 20->23 to use this route
""" builder = StateGraph(BoltState)

builder.set_entry_point("get_diameter")
builder.add_node("get_diameter", get_diameter)
builder.add_node("get_head_type", get_head_type)
builder.add_node("get_length", get_length)

# Chain agents
builder.add_edge("get_diameter", "get_head_type")
builder.add_edge("get_head_type", "get_length")
builder.add_edge("get_length", END)

graph = builder.compile()
"""

' builder = StateGraph(BoltState)\n\nbuilder.set_entry_point("get_diameter")\nbuilder.add_node("get_diameter", get_diameter)\nbuilder.add_node("get_head_type", get_head_type)\nbuilder.add_node("get_length", get_length)\n\n# Chain agents\nbuilder.add_edge("get_diameter", "get_head_type")\nbuilder.add_edge("get_head_type", "get_length")\nbuilder.add_edge("get_length", END)\n\ngraph = builder.compile()\n'

In [14]:
#disable cel15 to use this route
def router_node(state: BoltState): 
    return state


In [15]:
def routing_logic(state: BoltState):
    return ["get_diameter", "get_head_type", "get_length"]

In [16]:
def join_results(state: BoltState) -> BoltState:
    print("Merging final state")
    return state


In [17]:
from langgraph.graph import StateGraph, END

def build_graph():
    builder = StateGraph(BoltState)
    
    builder.add_node("router", router_node)
    builder.set_entry_point("router")
    builder.add_conditional_edges("router", routing_logic)

    builder.add_node("get_diameter", get_diameter)
    builder.add_node("get_head_type", get_head_type)
    builder.add_node("get_length", get_length)
    builder.add_node("join", join_results)

    builder.add_edge("get_diameter", "join")
    builder.add_edge("get_head_type", "join")
    builder.add_edge("get_length", "join")
    builder.add_edge("join", END)

    return builder.compile()


In [27]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph().draw_mermaid_png()))
except Exception:
    # You can put your exception handling code here
    pass

In [19]:
"""if __name__ == "__main__":
    #Convert your PNG to base64
    image_b64 = load_png_as_base64("102.jpg")  # your image path here

    # Initial state
    input_state = {
        "messages": [],
        "image": image_b64,
        "diameter": "",
        "head_type": "",
        "length": ""
    }

    result = graph.invoke(input_state)

    print("\n Diameter:", result["diameter"])
    print("Head Type:", result["head_type"])
    print("Length:", result["length"])
"""

'if __name__ == "__main__":\n    #Convert your PNG to base64\n    image_b64 = load_png_as_base64("102.jpg")  # your image path here\n\n    # Initial state\n    input_state = {\n        "messages": [],\n        "image": image_b64,\n        "diameter": "",\n        "head_type": "",\n        "length": ""\n    }\n\n    result = graph.invoke(input_state)\n\n    print("\n Diameter:", result["diameter"])\n    print("Head Type:", result["head_type"])\n    print("Length:", result["length"])\n'

In [26]:
def main():
    image_b64 = load_png_as_base64("102.jpg")
    graph = build_graph()
    input_state = {
        "messages": [],
        "image": image_b64,
        "diameter": "",
        "head_type": "",
        "length": ""
    } 
    # initial empty state
    final_state= graph.invoke(input_state)
    print("Final state:", final_state.get("head_type"))
    print("Final state:", final_state.get("diameter"))
    print("Final state:", final_state.get("length"))
    

if __name__ == "__main__":
    main()

Merging final state
Final state: ```json
{
  "answer": "socket",
  "confidence": 0.9,
  "reasoning": "The technical drawing shows a bolt with a recessed hexagonal shape on the head, which is characteristic of a socket head. The presence of a hexagonal recess is a clear indicator of a socket head type. The drawing includes dimensions and angles that are typical for socket head bolts, and the clarity of the hexagonal shape in the top view supports this identification."
}
```
Final state: ```json
{
  "answer": "12",
  "confidence": 0.9,
  "reasoning": "The diameter of the bolt is indicated by the notation 'ØD' in the technical drawing. The dimension for 'ØD' is labeled as 12, which suggests a nominal diameter of 12mm. The clarity of the dimension and the standard notation used contribute to a high confidence level."
}
```
Final state: ```json
{
  "answer": "The length is not explicitly labeled in the image.",
  "confidence": 0.5,
  "reasoning": "The image contains various dimensions and l