# Qdrant tutorial

* Written in Rust
* xAI
* [Benchmark](https://qdrant.tech/benchmarks/)

![HNSW](https://qdrant.tech/docs/gettingstarted/vector-search.png)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import uuid
from qdrant_client import models, QdrantClient


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
client_qdrant = QdrantClient(":memory:") # QdrantClient("http://localhost:6333")


In [27]:
client_qdrant_remote = QdrantClient("https://e0634f57-b3c9-4193-8355-cf7e48c8e247.europe-west3-0.gcp.cloud.qdrant.io")

  client_qdrant_remote = QdrantClient("https://e0634f57-b3c9-4193-8355-cf7e48c8e247.europe-west3-0.gcp.cloud.qdrant.io")


# Manage collections

In [5]:
client_qdrant.get_collections()

CollectionsResponse(collections=[])

In [6]:
if not client_qdrant.collection_exists("documents"):
    client_qdrant.create_collection(
        collection_name="documents",
        vectors_config={"size": 3, "distance": "Cosine"},
    )
else:
    print("Already created!")

# client_qdrant.delete_collection("documents")

# Index

```python
{
    "id": 129,
    "vector": [0.1, 0.2, 0.3, 0.4],
    "payload": {"color": "red"},
}
```

In [29]:
str(uuid.uuid4())

'192c7be7-7f5b-4c34-a83d-c7db0da8666c'

In [None]:
list_vects = [
    models.PointStruct(
        id=str(uuid.uuid4()),  # uuid aleatorio
        payload={"color": "red"},
        vector=[0.2, 0.1, 0.7],
    ),
    models.PointStruct(
        id=str(uuid.uuid4()),  
        payload={"color": "blue"},
        vector=[0.5, 0.4, 0.1],
    ),
    models.PointStruct(
        id=str(uuid.uuid4()), 
        payload={"color": "green"},
        vector=[0.3, 0.3, 0.4],
    )
    
]
  

In [8]:
client_qdrant.upsert(
    collection_name="documents",
    points=list_vects
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

# Distance

In [9]:
import numpy as np

# Example vector
v = [0.3, 0.4, 0.5]

# Compute L2 norm
norm = np.linalg.norm(v)
norm

0.7071067811865476

In [10]:
u = [0.2, 0.3, 0.5]

dist_cosine = np.dot(v, u) / (np.linalg.norm(v) * np.linalg.norm(u))
dist_cosine

0.9864876556434156

In [11]:
from  scipy.spatial import distance
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cosine.html
# 1-cosine distance

Dot product: Less computation, but requires vectors to be norm 1


In [30]:
products = ["eggs", "flour", "sugar", "vodka", "redbull"]
v1 = np.array([1, 1, 1, 0, 0])
v2 = np.array([100, 100, 100, 0, 0])
v3 = np.array([1, 0, 0, 1, 1])

print(f"{distance.euclidean(v1, v2)=}")
print(f"{distance.euclidean(v1, v3)=}")

distance.euclidean(v1, v2)=171.47302994931886
distance.euclidean(v1, v3)=2.0


In [31]:
print(f"{distance.cosine(v1, v2)=}")
print(f"{distance.cosine(v1, v3)=}")

distance.cosine(v1, v2)=0.0
distance.cosine(v1, v3)=0.6666666666666667


In [26]:
u = [1, 0]  # 0 degrees
v = [np.sqrt(2)/2, np.sqrt(2)/2]  # 45 degrees
dist_dot_product = np.dot(v, u)
dist_cosine = 1 - distance.cosine(v, u)
print(f"{dist_dot_product=}")
print(f"{dist_cosine=}")


dist_dot_product=0.7071067811865476
dist_cosine=0.7071067811865476


# Query

In [15]:
query = [0.6, 0.2, 0.2]

results = client_qdrant.query_points(
    collection_name="documents",
    query=query,
    limit=10
)

for point in results.points:
    print(point.id, " (score:", point.score, ")")

b14af5a8-df8a-4fc9-88fb-91a669025edd  (score: 0.9304842663481315 )
5df1402c-f110-45a2-b092-f440cc6c2e5e  (score: 0.8273403416227896 )
b00aadfd-47d4-47ef-ac08-d3553ff19f5e  (score: 0.5744269788079648 )


# Metadata Filtering

https://qdrant.tech/documentation/concepts/filtering/#should

In [16]:
client_qdrant.scroll(
    collection_name="documents",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="color",
                match=models.MatchValue(value="blue"),
            ),
        ]
    ),
)

([Record(id='b14af5a8-df8a-4fc9-88fb-91a669025edd', payload={'color': 'blue'}, vector=None, shard_key=None, order_value=None)],
 None)

In [17]:
client_qdrant.scroll(
    collection_name="documents",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="color",
                match=models.MatchAny(any=["blue", "green"]),
            ),
        ]
    ),
)

([Record(id='5df1402c-f110-45a2-b092-f440cc6c2e5e', payload={'color': 'green'}, vector=None, shard_key=None, order_value=None),
  Record(id='b14af5a8-df8a-4fc9-88fb-91a669025edd', payload={'color': 'blue'}, vector=None, shard_key=None, order_value=None)],
 None)

In [18]:
client_qdrant.scroll(
    collection_name="documents",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(
            key="color",
            match=models.MatchExcept(**{"except": ["red"]}))
        ]
    ),
)

([Record(id='5df1402c-f110-45a2-b092-f440cc6c2e5e', payload={'color': 'green'}, vector=None, shard_key=None, order_value=None),
  Record(id='b14af5a8-df8a-4fc9-88fb-91a669025edd', payload={'color': 'blue'}, vector=None, shard_key=None, order_value=None)],
 None)

References:
* [How Does Vector Search Work in Qdrant?](https://qdrant.tech/documentation/overview/vector-search/)
* [Build Your First Semantic Search Engine in 5 Minutes](https://qdrant.tech/documentation/beginner-tutorials/search-beginners/)
* [Question Answering as a Service with Cohere and Qdrant](https://qdrant.tech/articles/qa-with-cohere-and-qdrant/)