In [None]:
!pip install openai chromadb PyPDF2

Collecting chromadb
  Downloading chromadb-1.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.37.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31

In [None]:
from openai import OpenAI
import chromadb
from PyPDF2 import PdfReader



In [15]:
from google.colab import userdata
openai_api_key = userdata.get('OPENAI_API_KEY').strip()
client = OpenAI(api_key=openai_api_key)



In [12]:
# Step 1: Extract text
def extract_text_from_pdf(path):
    reader = PdfReader(path)
    return " ".join([page.extract_text() for page in reader.pages])

text = extract_text_from_pdf("PRD.pdf")



In [13]:
# Step 2: Chunk text
def chunk_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

chunks = chunk_text(text)



In [16]:
# Step 3: Create embeddings
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="docs")

for i, chunk in enumerate(chunks):
    embedding = client.embeddings.create(
        model="text-embedding-3-small",
        input=chunk
    ).data[0].embedding
    collection.add(documents=[chunk], embeddings=[embedding], ids=[str(i)])

In [17]:
# Step 4: Query + Retrieval
def query_doc(question):
    q_embed = client.embeddings.create(
        model="text-embedding-3-small", input=question
    ).data[0].embedding

    results = collection.query(query_embeddings=[q_embed], n_results=3)
    context = " ".join(results["documents"][0])

    # Step 5: Ask GPT
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Use the provided context to answer factually."},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
        ]
    )
    return response.choices[0].message.content

print(query_doc("What is the key takeaway from this paper?"))

The key takeaway from the paper is the implementation of a robust evaluation and cost optimization strategy for enhancing the performance of a large language model (LLM) search system. The strategy includes carefully testing changes on a small portion of traffic, utilizing multiple models (like GPT-4 and GPT-3.5) for different query complexities, and employing techniques such as response caching, prompt optimization, and offline batch evaluation to significantly reduce operating costs while maintaining high quality and reliability in responses. This ensures that the system continuously improves based on defined metrics and user outcomes, ultimately providing value to stakeholders while managing financial resources effectively.
