# Create a RAG with Responses API

* [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses)
* [Azure OpenAI Responses API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/responses?tabs=python-key)
* [Vertex AI RAG Engine overview](https://cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-overview?_gl=1*1gh4852*_up*MQ..&gclid=CjwKCAjw2vTFBhAuEiwAFaScwheDO8BoxS9t35aQYzYpVChMuVZr8xP7S7bEUNvDPrXD7bD8gZOPKxoC_Z0QAvD_BwE&gclsrc=aw.ds)

In [None]:
%load_ext autoreload
%autoreload 2

In [15]:
import os
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
from src import utils, conf

# Params

In [None]:
settings = conf.load(file="settings.yaml")
INDEX ="space"
LLM = "gpt-4.1-mini-2025-04-14"  # 1M token context, better than gpt-4o in structured output
RETRIEVE_K = 3

# Environment Variables

In [17]:
load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

# Clients

In [18]:
client_openai = OpenAI(
    api_key=OPENAI_API_KEY
)

# Upload Data

In [19]:
# Identify every .pdf from data/raw/
pdf_files = [f for f in utils.path_data_raw.glob("*.pdf")]
print(f"{pdf_files=}")

pdf_files=[WindowsPath('c:/Users/manua/Documents/repos/dslabs/dslab-rag-e2e/data/raw/3I_ATLAS, NAUKAS.pdf'), WindowsPath('c:/Users/manua/Documents/repos/dslabs/dslab-rag-e2e/data/raw/Divulgacion Planetaria Althera.pdf'), WindowsPath('c:/Users/manua/Documents/repos/dslabs/dslab-rag-e2e/data/raw/OMUAMUA. NAUKAS.pdf')]


In [None]:
file_path = utils.path_data_raw / "Divulgacion Planetaria Althera.pdf"
id_file = 'file-F5KwBtf7a6juA5LDgoQ1Nf'  # add file id once created from cell output print

lst_oai_files = [x.id for x in client_openai.files.list()]

if id_file not in lst_oai_files:
    response_file = client_openai.files.create(
        file=open(file_path, 'rb'),
        purpose="assistants",
        expires_after={
            "anchor": "created_at",
            "seconds": 2592000   # 30d
        })
    print(f"Added file: {response_file.id=}")

# Index

In [None]:
id_vdb = "vs_68bd8ba217fc8191813ca8ee1a846fc3"   # add vector store id once created from cell output print

lst_oai_vdb =[x.id for x in client_openai.vector_stores.list()]

if id_vdb not in lst_oai_vdb:
    response_attach = client_openai.vector_stores.create(
        name=INDEX,
        file_ids=[response_file.id],
        expires_after={
            "anchor": "last_active_at",
            "days": 30
            }
    )
    print(f"Created VDB: {response_attach.id=}")

    # max_chunk_size_tokens of 800 and chunk_overlap_tokens of 400.

In [69]:
vector_store_files = client_openai.vector_stores.files.list(
  vector_store_id=id_vdb
)

lst_indexed_files = [x.id for x in vector_store_files.data]
if id_file not in lst_indexed_files:
    client_openai.vector_stores.files.create(
        vector_store_id=id_vdb,
        file_id=id_file
    )
    print(f"Indexed file: {id_file=} in VDB: {id_vdb=}")


# Query

In [52]:
query = "¿A qué distancia de la Tierra está el sistema Althéra?"
res_search = client_openai.vector_stores.search(
    vector_store_id=id_vdb,
    query=query,
    max_num_results=3
)

In [53]:
for result in res_search.data:
    print(str(len(result.content[0].text)) + ' of character of content from ' + result.filename + ' with a relevant score of ' + str(result.score))

2550 of character of content from Divulgacion Planetaria Althera.pdf with a relevant score of 0.9877970374838512
2292 of character of content from Divulgacion Planetaria Althera.pdf with a relevant score of 0.8637451964347038
2602 of character of content from Divulgacion Planetaria Althera.pdf with a relevant score of 0.815508833450933


# RAG

In [55]:
query = "¿A qué distancia de la Tierra está el sistema Althéra?"

res_rag = client_openai.responses.create(
    input= query,
    model=LLM,
    tools=[{
        "type": "file_search",
        "vector_store_ids": [id_vdb],
    }]
)

# Extract annotations from the response
annotations = res_rag.output[1].content[0].annotations
    
# Get top-k retrieved filenames
retrieved_files = set([result.filename for result in annotations])

print(f'Files used: {retrieved_files}')
print('Response:')
print(res_rag.output[1].content[0].text) # 0 being the filesearch call

Files used: {'Divulgacion Planetaria Althera.pdf'}
Response:
El sistema binario Althéra (HD 4579 AB) está situado en la constelación de Orión, a una distancia de 42,7 años luz de la Tierra. Esta proximidad relativa permite realizar observaciones detalladas del sistema y sus planetas circumbinarios con telescopios avanzados actuales y futuros.

Esta información viene especificada claramente en la fuente que me proporcionaste, indicando la distancia exacta desde la Tierra al sistema Althéra como 42,7 años luz.


In [60]:
annotations[0]

AnnotationFileCitation(file_id='file-F5KwBtf7a6juA5LDgoQ1Nf', filename='Divulgacion Planetaria Althera.pdf', index=451, type='file_citation')

In [65]:
res_rag.usage.total_tokens

16270

In [None]:
res_rag.output[0]  # ResponseFileSearchToolCall

ResponseFileSearchToolCall(id='fs_68bda5772d588193ace214430854f1b20bb18c5991161cdc', queries=['distancia del sistema Althéra a la Tierra', 'Althéra'], status='completed', type='file_search_call', results=None)

In [None]:
res_rag.output[1]  # ResponseOutputMessage

ResponseOutputMessage(id='msg_68bda5798140819392edd6355bd1ec420bb18c5991161cdc', content=[ResponseOutputText(annotations=[AnnotationFileCitation(file_id='file-F5KwBtf7a6juA5LDgoQ1Nf', filename='Divulgacion Planetaria Althera.pdf', index=451, type='file_citation')], text='El sistema binario Althéra (HD 4579 AB) está situado en la constelación de Orión, a una distancia de 42,7 años luz de la Tierra. Esta proximidad relativa permite realizar observaciones detalladas del sistema y sus planetas circumbinarios con telescopios avanzados actuales y futuros.\n\nEsta información viene especificada claramente en la fuente que me proporcionaste, indicando la distancia exacta desde la Tierra al sistema Althéra como 42,7 años luz.', type='output_text', logprobs=[])], role='assistant', status='completed', type='message')