# Agentic AI Workshop — Part 2: Embeddings & Cosine Similarity (No Vector DB yet)

This notebook demonstrates:
- Creating embeddings for a small set of texts
- Computing cosine similarity for a query vs. sources
- Explaining why embeddings matter for retrieval


# %%capture
# If needed, uncomment to install:
# !pip install --upgrade openai numpy pandas

In [None]:
from openai import OpenAI
import numpy as np
import pandas as pd

client = OpenAI()

def get_embeddings(texts, model="text-embedding-3-small"):
    # Returns an array of shape (N, D)
    resp = client.embeddings.create(model=model, input=texts)
    return np.array([d.embedding for d in resp.data], dtype=np.float32)

def cosine_sim(a, b):
    a = a / (np.linalg.norm(a) + 1e-8)
    b = b / (np.linalg.norm(b) + 1e-8)
    return float(np.dot(a, b))

In [None]:
docs = [
    "Ready Tensor’s platform helps publish AI projects and evaluate models.",
    "RAG retrieves context from a knowledge base to ground LLM answers.",
    "Vector databases store embeddings and support fast similarity search.",
    "Prompt engineering shapes model behavior with clear instructions and constraints.",
    "Multi‑agent systems coordinate several specialized LLMs to solve complex tasks.",
]

query = "How do I ground a model’s answer in external knowledge?"

doc_embeddings = get_embeddings(docs)
query_embedding = get_embeddings([query])[0]

# Compute cosine similarities
scores = [cosine_sim(query_embedding, e) for e in doc_embeddings]
df = pd.DataFrame({
    "doc_id": range(len(docs)),
    "document": docs,
    "cosine_similarity": scores
}).sort_values("cosine_similarity", ascending=False)

df