In [1]:
import numpy as np

def cosine_similarity(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

v1 = [0.2, 0.4, 0.6]
v2 = [0.1, 0.3, 0.5]

print(cosine_similarity(v1, v2))

0.9938586931957764


In [2]:
def normalize(v):
    v = np.array(v)
    return v / np.linalg.norm(v)

v1_n = normalize(v1)
v2_n = normalize(v2)

print(np.dot(v1_n, v2_n))

0.9938586931957764


In [None]:
# !pip install chromadb

In [4]:
import chromadb
from chromadb.config import Settings

client = chromadb.Client(
    Settings(
        persist_directory="./chroma_db",
        anonymized_telemetry=False
    )
)

In [7]:
collection = client.get_or_create_collection(
    name="docs",
    metadata={"hnsw:space": "cosine"}
)

In [8]:
collection.add(
    ids=["doc1", "doc2", "doc3"],
    documents=[
        "Deep learning is powerful",
        "Transformers are amazing",
        "Cats are cute animals"
    ],
    embeddings=[
        [0.1, 0.2, 0.3],
        [0.2, 0.1, 0.4],
        [0.9, 0.8, 0.7]
    ],
    metadatas=[
        {"topic": "ai", "year": 2024},
        {"topic": "ai", "year": 2023},
        {"topic": "animals", "year": 2022}
    ]
)

In [9]:
results = collection.query(
    query_embeddings=[[0.15, 0.15, 0.35]],
    n_results=2
)

print(results)

{'ids': [['doc2', 'doc1']], 'embeddings': None, 'documents': [['Transformers are amazing', 'Deep learning is powerful']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'year': 2023, 'topic': 'ai'}, {'year': 2024, 'topic': 'ai'}]], 'distances': [[0.013596415519714355, 0.020465314388275146]]}


In [10]:
results = collection.query(
    query_embeddings=[[0.15, 0.15, 0.35]],
    where={"topic": "ai"},
    n_results=2
)

print(results)


{'ids': [['doc2', 'doc1']], 'embeddings': None, 'documents': [['Transformers are amazing', 'Deep learning is powerful']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'topic': 'ai', 'year': 2023}, {'year': 2024, 'topic': 'ai'}]], 'distances': [[0.013596415519714355, 0.020465314388275146]]}


In [11]:
collection.update(
    ids=["doc3"],
    metadatas=[{"topic": "pets", "year": 2024}]
)

In [13]:
collection.delete(ids=["doc1"])

In [22]:
!pip install pymilvus milvus-lite

Collecting milvus-lite
  Downloading milvus_lite-2.5.1-py3-none-manylinux2014_x86_64.whl.metadata (10.0 kB)
Downloading milvus_lite-2.5.1-py3-none-manylinux2014_x86_64.whl (55.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.3/55.3 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: milvus-lite
Successfully installed milvus-lite-2.5.1


In [24]:
from pymilvus import MilvusClient

client = MilvusClient("milvus.db")

client.create_collection(
    collection_name="docs",
    dimension=3
)

client.insert(
    collection_name="docs",
    data=[
        {"id": 1, "vector": [0.1, 0.2, 0.3]},
        {"id": 2, "vector": [0.2, 0.1, 0.4]}
    ]
)

results = client.search(
    collection_name="docs",
    data=[[0.15, 0.15, 0.35]],
    limit=2
)

print(results)

data: [[{'id': 2, 'distance': 0.9864036440849304, 'entity': {}}, {'id': 1, 'distance': 0.9795348048210144, 'entity': {}}]]
