# Qdrant tutorial

* Written in Rust
* xAI
* [Benchmark](https://qdrant.tech/benchmarks/)

![HNSW](https://qdrant.tech/docs/gettingstarted/vector-search.png)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import uuid
from qdrant_client import models, QdrantClient


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
client_qdrant = QdrantClient(":memory:") # QdrantClient("http://localhost:6333")


In [None]:
# client_qdrant_remote = QdrantClient("https://db4e5b82-0897-4033-8d9c-13cd30dea1b0.europe-west3-0.gcp.cloud.qdrant.io")

  client_qdrant_remote = QdrantClient("https://db4e5b82-0897-4033-8d9c-13cd30dea1b0.europe-west3-0.gcp.cloud.qdrant.io")


# Manage collections

In [6]:
client_qdrant.get_collections()

CollectionsResponse(collections=[])

In [7]:
if not client_qdrant.collection_exists("documents"):
    client_qdrant.create_collection(
        collection_name="documents",
        vectors_config={"size": 3, "distance": "Cosine"},
    )
else:
    print("Already created!")

# client_qdrant.delete_collection("documents")

# Index

```python
{
    "id": 129,
    "vector": [0.1, 0.2, 0.3, 0.4],
    "payload": {"color": "red"},
}
```

In [10]:
str(uuid.uuid4())

'ef129140-c2ed-4c96-a516-7ca7c02879ea'

In [11]:
list_vects = [
    models.PointStruct(
        id=str(uuid.uuid4()),  # uuid aleatorio
        payload={"color": "red"},
        vector=[0.2, 0.1, 0.7],
    ),
    models.PointStruct(
        id=str(uuid.uuid4()),  
        payload={"color": "blue"},
        vector=[0.5, 0.4, 0.1],
    ),
    models.PointStruct(
        id=str(uuid.uuid4()), 
        payload={"color": "green"},
        vector=[0.3, 0.3, 0.4],
    )
    
]
  

In [12]:
client_qdrant.upsert(
    collection_name="documents",
    points=list_vects  # list[models.PointStruct]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

# Distance

In [13]:
import numpy as np

# Example vector
v = [0.3, 0.4, 0.5]

# Compute L2 norm
norm = np.linalg.norm(v)
norm

0.7071067811865476

In [14]:
u = [0.2, 0.3, 0.5]

dist_cosine = np.dot(v, u) / (np.linalg.norm(v) * np.linalg.norm(u))
dist_cosine

0.9864876556434156

In [15]:
from  scipy.spatial import distance
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cosine.html
# 1-cosine distance

Dot product: Less computation, but requires vectors to be norm 1


In [17]:
products = ["eggs", "flour", "sugar", "vodka", "redbull"]
v1 = np.array([1, 1, 1, 0, 0])
v2 = np.array([100, 100, 100, 0, 0])
v3 = np.array([1, 0, 0, 1, 1])

print(f"{distance.euclidean(v1, v2)=}")
print(f"{distance.euclidean(v1, v3)=}")

distance.euclidean(v1, v2)=171.47302994931886
distance.euclidean(v1, v3)=2.0


In [19]:
print(f"{distance.cosine(v1, v2)=}")
print(f"{distance.cosine(v1, v3)=}")

distance.cosine(v1, v2)=0.0
distance.cosine(v1, v3)=0.6666666666666667


In [20]:
u = [1, 0]  # 0 degrees
v = [np.sqrt(2)/2, np.sqrt(2)/2]  # 45 degrees
dist_dot_product = np.dot(v, u)
dist_cosine = 1 - distance.cosine(v, u)
print(f"{dist_dot_product=}")
print(f"{dist_cosine=}")


dist_dot_product=0.7071067811865476
dist_cosine=0.7071067811865476


# Query

In [22]:
query = [0.6, 0.2, 0.2]

results = client_qdrant.query_points(
    collection_name="documents",
    query=query,
    limit=2
)

for point in results.points:
    print(point.id, " (score:", point.score, ")")

3c180943-0b23-41df-9b76-104a151fd21c  (score: 0.9304842663481315 )
304599f5-eed7-4a0e-b623-de9bf32e7dec  (score: 0.8273403416227896 )


# Metadata Filtering

https://qdrant.tech/documentation/concepts/filtering/#should

In [23]:
client_qdrant.scroll(
    collection_name="documents",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="color",
                match=models.MatchValue(value="blue"),
            ),
        ]
    ),
)

([Record(id='3c180943-0b23-41df-9b76-104a151fd21c', payload={'color': 'blue'}, vector=None, shard_key=None, order_value=None)],
 None)

In [25]:
client_qdrant.scroll(
    collection_name="documents",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="color",
                match=models.MatchAny(any=["blue", "green"]),
            ),
        ]
    ),
)

([Record(id='304599f5-eed7-4a0e-b623-de9bf32e7dec', payload={'color': 'green'}, vector=None, shard_key=None, order_value=None),
  Record(id='3c180943-0b23-41df-9b76-104a151fd21c', payload={'color': 'blue'}, vector=None, shard_key=None, order_value=None)],
 None)

In [26]:
client_qdrant.scroll(
    collection_name="documents",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
            key="color",
            match=models.MatchExcept(**{"except": ["red"]}))
        ]
    ),
)

([Record(id='304599f5-eed7-4a0e-b623-de9bf32e7dec', payload={'color': 'green'}, vector=None, shard_key=None, order_value=None),
  Record(id='3c180943-0b23-41df-9b76-104a151fd21c', payload={'color': 'blue'}, vector=None, shard_key=None, order_value=None)],
 None)

References:
* [How Does Vector Search Work in Qdrant?](https://qdrant.tech/documentation/overview/vector-search/)
* [Build Your First Semantic Search Engine in 5 Minutes](https://qdrant.tech/documentation/beginner-tutorials/search-beginners/)
* [Question Answering as a Service with Cohere and Qdrant](https://qdrant.tech/articles/qa-with-cohere-and-qdrant/)