# Part 12

# Using File Search

Universal code for the entire notebook

In [1]:
# make sure we all the packages we need
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
# Import necessary libraries
from openai import OpenAI  # Used for interacting with OpenAI's API
from typing_extensions import override  # Used for overriding methods in subclasses
from openai import AssistantEventHandler  # Used for handling events related to OpenAI assistants

# Additional libraries for time and date manipulation
import time
import pytz
import datetime


In [2]:
# Create an instance of the OpenAI class to interact with the API.
# This assumes you have set the OPENAI_API_KEY environment variable.
client = OpenAI() 

## Creating an Assistant with File Search enabled

Our first step is to create an Assistant that can do file searching regardless of where the vector store resides (Assistant or Thread)

In [3]:
# Create an assistant using the client library.
assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant that answers questions about the stories in your files. The stories are from a variety of authors. 
        You will answer questions from the user about the stories. All you will do is answer questions about the stories in the files and provide related information.
        If the user asks you a question that is not related to the stories in the files, you should let them know that you can only answer questions about the stories.
    """,
    
    name="File Search Demo Assistant - Stories",  # Give the assistant a name.
    
    tools=[{"type": "file_search"}], # Add the file search capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_file_search": "True",
        "can_hold_vector_store": "True",
    },
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(assistant)  # Print the full assistant object.
print("\n\n")
print(assistant.name)  # Print the name of the assistant.
print(assistant.metadata)  # Print the metadata of the assistant.

Assistant(id='asst_DJnsWlXrYOT2YlW43BBvht7x', created_at=1717414470, description=None, instructions=' \n        You are a helpful assistant that answers questions about the stories in your files. The stories are from a variety of authors. \n        You will answer questions from the user about the stories. All you will do is answer questions about the stories in the files and provide related information.\n        If the user asks you a question that is not related to the stories in the files, you should let them know that you can only answer questions about the stories.\n    ', metadata={'can_be_used_for_file_search': 'True', 'can_hold_vector_store': 'True'}, model='gpt-4o', name='File Search Demo Assistant - Stories', object='assistant', tools=[FileSearchTool(type='file_search')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=None, file_search=ToolResourcesFileSearch(vector_store_ids=[])), top_p=1.0)



File Search Demo Assistant - Stories
{'ca

## Creating a Vector Store

Now we will create our vector store to hold our files and add files at the same time.

In [8]:
from contextlib import ExitStack

# Create a vector store with a name for the store.
vector_store = client.beta.vector_stores.create(name="Great Fiction Stories")

# Ready the files for upload to the vector store.
file_paths = ["./artifacts/I Am Legend.pdf", "./artifacts/The Veldt.pdf"]

# Using ExitStack to manage multiple context managers and ensure they are properly closed.
with ExitStack() as stack:
    # Open each file in binary read mode and add the file stream to the list
    file_streams = [stack.enter_context(open(path, "rb")) for path in file_paths]

    # Use the upload and poll helper method to upload the files, add them to the vector store,
    # and poll the status of the file batch for completion.
    file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
        vector_store_id=vector_store.id, files=file_streams
    )

    # Print the vector store information
    print(vector_store.name)
    print(vector_store.id)
    
    # Print the status and the file counts of the batch to see the results
    print(file_batch.status)
    print(file_batch.file_counts)


completed
FileCounts(cancelled=0, completed=2, failed=0, in_progress=0, total=2)


## Attaching the Vector Store to the Assistant

We have an Assistant that has File Search enabled and we have a Vector Store with files in them. It's time to join the two up. 

In [11]:
try:
    # Attach the vector store to the assistant to enable file search capabilities.
    assistant = client.beta.assistants.update(
        assistant_id=assistant.id,
        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    )

    # Print the assistant's tools and tool resources to verify the attachment of the vector store.
    print("Assistant Tools:")
    for tool in assistant.tools:
        print(f" - {tool}")

    # Print the assistant's tool resources to verify the attachment of the vector store
    print("\nAssistant Tool Resources:")
    for resource, details in assistant.tool_resources:
        print(f" - {resource}: {details}")

except Exception as e:
    print(f"An error occurred while updating the assistant: {e}")


Assistant Tools:
 - FileSearchTool(type='file_search')

Assistant Tool Resources:
 - code_interpreter: None
 - file_search: ToolResourcesFileSearch(vector_store_ids=['vs_xR6pzmCtXQz7iUBrDavNg0Kc'])


## Creating an Assistant and Vector Store at the Same Time

If we have file id's we can just feed them in when creating an assistant to get the Assistant and the Vector Store at the same time

In [23]:

# Create an assistant using the client library.
try:
    assistant = client.beta.assistants.create(
        model="gpt-4o",  # Specify the model to be used.
        instructions=(
            "You are a helpful assistant that answers questions about the stories in your files. "
            "The stories are from a variety of authors. "
            "You will answer questions from the user about the stories. All you will do is answer questions about the stories in the files and provide related information. "
            "If the user asks you a question that is not related to the stories in the files, you should let them know that you can only answer questions about the stories."
        ),
        name="Quick Assistant and Vector Store at Once",  # Give the assistant a name.
        tools=[{"type": "file_search"}],  # Add the file search capability to the assistant.
        # Create a vector store and attach it to the assistant in one step.
        tool_resources={
            "file_search": {
                "vector_stores": [
                    {
                        "file_ids": [
                            "file-UY7uzH3SMK0ALbwQeRk5OE0i",
                            "file-ZN71rhzhlvLQWiU5ZWEf3WP0"
                        ],
                        "metadata": {}
                    }
                ]
            }
        },
        metadata={  # Add metadata about the assistant's capabilities.
            "can_be_used_for_file_search": "True",
            "can_hold_vector_store": "True",
        },
        temperature=1,  # Set the temperature for response variability.
        top_p=1,  # Set the top_p for nucleus sampling.
    )
except Exception as e:
    print(f"An error occurred while creating the assistant: {e}")
else:
    # Print the details of the created assistant to check its properties.
    print(assistant)  # Print the full assistant object.
    print("\n\n")
    print(assistant.name)  # Print the name of the assistant.
print(assistant.tool_resources.file_search.vector_store_ids)


Assistant(id='asst_xDHaF6YQ0fRL1v3Pi33KxYUk', created_at=1717454362, description=None, instructions='You are a helpful assistant that answers questions about the stories in your files. The stories are from a variety of authors. You will answer questions from the user about the stories. All you will do is answer questions about the stories in the files and provide related information. If the user asks you a question that is not related to the stories in the files, you should let them know that you can only answer questions about the stories.', metadata={'can_be_used_for_file_search': 'True', 'can_hold_vector_store': 'True'}, model='gpt-4o', name='Quick Assistant and Vector Store at Once', object='assistant', tools=[FileSearchTool(type='file_search')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=None, file_search=ToolResourcesFileSearch(vector_store_ids=['vs_dEMiqQ0z81eIU47WGK2HdZmP'])), top_p=1.0)



Quick Assistant and Vector Store at Once
['v