In [15]:
from openai import OpenAI
from openai.types.beta.threads.message_create_params import (
    Attachment,
    AttachmentToolFileSearch,
)

filename = "/home/tyson/echo/resources/example_dataset/robotics/saycan.pdf"
prompt = "Generate a json file of text chunks I can use for a RAG application"

client = OpenAI()

pdf_assistant = client.beta.assistants.create(
    model="gpt-4o",
    description="An assistant to generate text chunks from a document.",
    tools=[{"type": "file_search"}],
    name="PDF assistant",
)

# Create thread
thread = client.beta.threads.create()
file = client.files.create(file=open(filename, "rb"), purpose="assistants")

# Create assistant
client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    attachments=[
        Attachment(
            file_id=file.id, tools=[AttachmentToolFileSearch(type="file_search")]
        )
    ],
    content=prompt,
)

# Run thread
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=pdf_assistant.id, timeout=1000
)

if run.status != "completed":
    raise Exception("Run failed:", run.status)

messages_cursor = client.beta.threads.messages.list(thread_id=thread.id)
messages = [message for message in messages_cursor]

# Output text
res_txt = messages[0].content[0].text.value
print(res_txt)

Here is a sample of text chunks extracted from the provided document, formatted as a JSON file for use in a RAG (Retrieval-Augmented Generation) application:

```json
{
  "text_chunks": [
    {
      "chunk_id": 1,
      "text": "Large language models can encode a wealth of semantic knowledge about the world. Such knowledge could be extremely useful to robots aiming to act upon high-level, temporally extended instructions expressed in natural language. However, a significant weakness of language models is that they lack real-world experience, which makes it difficult to leverage them for decision making within a given embodiment.",
      "source": "saycan.pdf",
      "citation": "【4:2†source】"
    },
    {
      "chunk_id": 2,
      "text": "The robot can act as the language model’s 'hands and eyes,' while the language model supplies high-level semantic knowledge about the task. We show how low-level skills can be combined with large language models so that the language model provides 

In [14]:
pass