In [None]:
# 📦 Install required packages
%pip install chromadb openai

# 🧠 Embeddings in Action with ChromaDB
This notebook introduces the concept of embeddings, how they are generated, and how to use them in ChromaDB for storing and querying documents.

## 1️⃣ What is an Embedding?
An embedding is a fixed-length vector of numbers that represents the meaning of a piece of data (e.g., text, image).
Embeddings are useful for comparing the similarity between different data items in vector space.

## 2️⃣ Generate Raw Embedding (Dummy Example)

In [None]:
import numpy as np

sentence = "This is a test sentence."
embedding = np.random.rand(1536)
print(f"First 5 values of the embedding: {embedding[:5]}")

## 3️⃣ Using Default Embedding Function in ChromaDB

In [None]:
import chromadb
from chromadb.utils import embedding_functions

client = chromadb.Client()
default_ef = embedding_functions.DefaultEmbeddingFunction()
collection = client.get_or_create_collection("demo_collection", embedding_function=default_ef)

documents = [
    {"id": "doc1", "text": "Hello, world!"},
    {"id": "doc2", "text": "How are you today?"},
    {"id": "doc3", "text": "Goodbye, see you later!"},
]

for doc in documents:
    collection.upsert(ids=doc["id"], documents=[doc["text"]])

## 4️⃣ Perform Similarity Search

In [None]:
query = "How are you today?"
results = collection.query(query_texts=[query], n_results=2)

for idx, doc in enumerate(results["documents"][0]):
    doc_id = results["ids"][0][idx]
    dist = results["distances"][0][idx]
    print(f"{idx+1}. {doc} (ID: {doc_id}, Distance: {dist})")

## 5️⃣ Manually Embedding and Adding to Collection

In [None]:
collection = client.get_or_create_collection("manual_embeds")
collection.add(
    documents=["Manual doc one", "Manual doc two"],
    embeddings=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
    ids=["id1", "id2"]
)
print(collection.get(ids=["id1", "id2"]))