# Example Output

```python
{
    "title": string,
    "paragraphs": string,
    "media": {
        "count": 2
    }
}
```

# Import Packages

In [1]:
import os

os.environ["OPENAI_API_KEY"] = (
    "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"
)

In [2]:
import base64
from typing import Literal, List
from pydantic import BaseModel, Field

In [12]:
from langchain_openai import ChatOpenAI

from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_core.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)

In [7]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, StateGraph, MessagesState

# Body

In [17]:
image_path = (
    "sample_slides/Beige Scrapbook Art and History Museum Presentation pg10.png"
)

with open(image_path, "rb") as file:
    image_data = base64.b64encode(file.read()).decode("utf-8")

In [6]:
model = ChatOpenAI(name="gpt-4o")

In [9]:
# Define the function that determines whether to continue or not
def should_continue(state: MessagesState):
    messages = state["messages"]
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "tools"
    # Otherwise, we stop (reply to the user)
    return END

In [16]:
# Define the function that calls the model
def call_model(state: MessagesState):
    slide_extraction_prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessage(
                """You are a helpful assistant. Your task is to generate structured outputs based on the provided schema
    Ensure the output matches the schema and validate it if necessary.

    These are element type that you have to extract:
    + Title: The primary, largest text at the top of the slide. There is only one title per slide.
    + Header: The secondary, large text positioned below the title. It is the second largest text on the slide.
    + Sub-header: The tertiary, medium-sized text located below the header. It is the third largest text on the slide.
    + Paragraph: Smaller text that can appear under the title, header, or sub-header. It provides detailed information or content.
    + Media (Image): Visual elements such as images, which can be placed anywhere on the slide.
    """
            ),
            (
                "user",
                [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                    }
                ],
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    messages = state["messages"]
    response = model.invoke({"messages": messages})
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}