In [3]:
import os
import requests
from io import BytesIO
import openai
from dotenv import load_dotenv

In [2]:
load_dotenv()  # Load environment variables from .env file

# Ensure the OPENAI_API_KEY is set
openai.api_key = os.getenv("OPENAI_API_KEY")
if openai.api_key is None:
    raise ValueError("OPENAI_API_KEY environment variable not set")

client = openai.OpenAI()

# Responses API - File search
File search is a tool available in the Responses API. It enables models to retrieve information in a knowledge base of previously uploaded files through semantic and keyword search. By creating vector stores and uploading files to them, you can augment the models' inherent knowledge by giving them access to these knowledge bases or vector_stores.

This is a hosted tool managed by OpenAI, meaning you don't have to implement code on our end to handle its execution. When the model decides to use it, it will automatically call the tool, retrieve information from our files, and return an output.

## Step 1: Create a vector store and upload a file
Prior to using file search with the Responses API, we need to have set up a knowledge base in a vector store and uploaded files to it.

**Upload the file** to the File API:

In [None]:
import requests
from io import BytesIO
from openai import OpenAI

client = OpenAI()

def create_file(client, file_path):
    if file_path.startswith("http://") or file_path.startswith("https://"):
        # Download the file content from the URL
        response = requests.get(file_path)
        file_content = BytesIO(response.content)
        file_name = file_path.split('/')[-1]
        file_tuple = (file_name, file_content)
        
        # Create the file using the OpenAI client
        result = client.files.create(
            file=file_tuple,
            purpose="assistants"
        )
    else:
        # Handle local file path
        with open(file_path, "rb") as file_content:
            result = client.files.create(
                file=file_content,
                purpose="assistants"
            )
    print(result.id)
    return result.id

# Replace with the file path or URL
file_id = create_file(client, "https://cdn.openai.com/API/docs/deep_research_blog.pdf")
file_id

deep_research_blog.pdf
file-CsaxfmQL1ciHvcQH5Vu1v8


'file-CsaxfmQL1ciHvcQH5Vu1v8'

**Create a vector store**:

In [None]:
vector_store = client.vector_stores.create(
    name="knowledge_base"
)
print(vector_store.id)

vs_68482bede36481918034b42d74e508e5


**Add the file** to the vector store:

In [None]:
result = client.vector_stores.files.create_and_poll(
    vector_store_id=vector_store.id,
    file_id=file_id
)
print(result)

VectorStoreFile(id='file-CsaxfmQL1ciHvcQH5Vu1v8', created_at=1749560326, last_error=None, object='vector_store.file', status='in_progress', usage_bytes=0, vector_store_id='vs_68482bede36481918034b42d74e508e5', attributes={}, chunking_strategy=StaticFileChunkingStrategyObject(static=StaticFileChunkingStrategy(chunk_overlap_tokens=400, max_chunk_size_tokens=800), type='static'))


## Step 2: Create a model with the file search tool
Once our knowledge base is set up, we can include the file_search tool in the list of tools available to the model, along with the list of vector stores in which to search.

In [None]:
client = openai.OpenAI()

response = client.responses.create(
    model="gpt-4o-mini",
    input="What is deep research by OpenAI?", 
    tools=[{
        "type": "file_search",
        "vector_store_ids": [vector_store.id],
    }]
)
print(response.output_text)

Response(id='resp_68482dcad934819a9bc12477855b7ad706e7d7e5b249e519', created_at=1749560778.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-mini-2024-07-18', object='response', output=[ResponseFileSearchToolCall(id='fs_68482dd8fd58819a937b001f19ca155b06e7d7e5b249e519', queries=['What is deep research by OpenAI?'], status='completed', type='file_search_call', results=None), ResponseOutputMessage(id='msg_68482dea2dcc819a94a791d099fcaae406e7d7e5b249e519', content=[ResponseOutputText(annotations=[AnnotationFileCitation(file_id='file-CsaxfmQL1ciHvcQH5Vu1v8', index=1518, type='file_citation', filename='deep_research_blog.pdf'), AnnotationFileCitation(file_id='file-CsaxfmQL1ciHvcQH5Vu1v8', index=1518, type='file_citation', filename='deep_research_blog.pdf'), AnnotationFileCitation(file_id='file-CsaxfmQL1ciHvcQH5Vu1v8', index=1850, type='file_citation', filename='deep_research_blog.pdf'), AnnotationFileCitation(file_id='file-CsaxfmQL1ciHvcQH5Vu1v8', index=1