In [3]:
from qdrant_client import QdrantClient , models



In [4]:
client = QdrantClient(url="http://localhost:6333")

In [4]:
client.create_collection(
    collection_name="qdrant_basics",
    vectors_config = models.VectorParams(size=3,distance=models.Distance.COSINE)
)

True

In [34]:
client.create_collection(
    collection_name = "random",
    vectors_config=models.VectorParams(size=4,distance = models.Distance.COSINE)
)

True

In [5]:
client.upsert(
    collection_name="qdrant_basics",
    points=[
        models.PointStruct(
            id=1,
            payload={"color":"red"},
            vector=[0.1,0.2,0.3]
        ),
        models.PointStruct(
            id=2,
            payload={"color":"blue"},
            vector=[0.22,0.45,0.55]
        )

            ]
    
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [6]:
client.upsert(
    collection_name="qdrant_basics",
    points = [
        models.PointStruct(
            id=3,
            payload={"color":"red"},
            vector=[0.1,0.22,0.5]
        ),
        models.PointStruct(
            id=4,
            payload={"color":"red"},
            vector=[0.2,0.56,0.33]
        )

    ]
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

# Updating the Payloads

In [7]:
client.set_payload(
    collection_name="qdrant_basics",
    payload={"color":"green",
             "size":"medium"},
    points=[1,4]
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [8]:
client.set_payload(
    collection_name = "qdrant_basics",
    payload = {"color":"green","size":"medium"},

    points = models.Filter(
        must = [models.FieldCondition(
                key = "color",
                match = models.MatchValue(value = "red")

        ),
        models.FieldCondition(
            key="size",
            match = models.MatchValue(value="large")
        )
        ]
    )

)

UpdateResult(operation_id=3, status=<UpdateStatus.COMPLETED: 'completed'>)

In [9]:
#deleting the payloads

client.clear_payload(
    collection_name = "qdrant_basics",

    points_selector = [1]

)

UpdateResult(operation_id=4, status=<UpdateStatus.COMPLETED: 'completed'>)

In [10]:
#with filter

client.clear_payload(
    collection_name = "qdrant_basics",
    points_selector = models.Filter(
        must = [
            models.FieldCondition(
                key = "color",
                match = models.MatchValue(value = "red")
            ),
            models.FieldCondition(
                key = "size",
                match = models.MatchValue(value = "small")
            )
        ]
    )
)



UpdateResult(operation_id=5, status=<UpdateStatus.COMPLETED: 'completed'>)

In [11]:
#creating indexes according to the payload values

client.create_payload_index(
    collection_name = "qdrant_basics",
    field_name = "color",
    field_schema= "keyword" #this denotes that color is string type with categorical value
)

UpdateResult(operation_id=7, status=<UpdateStatus.COMPLETED: 'completed'>)

In [3]:
#Inserting ten datas with payloads size and color

client.upsert(
    collection_name = "qdrant_basics",
    points = [
        models.PointStruct(
            id=5,
            payload = {
                "color" : "red",
                "size" : "medium"
            },
            vector = [0.11,0.34,0.33]
        ),
        models.PointStruct(
            id=6,
            payload = {
                "color":"black",
                "size" : "small"
            },
            vector = [0.2,0.35,0.27]
        ),
        models.PointStruct(
            id=7,
            payload = {
                "color":"blue",
                "size" : "large"
            },
            vector = [0.21,0.32,0.29]
        ),
        models.PointStruct(
            id=8,
            payload = {
                "color":"green",
                "size" : "medium"
            },
            vector = [0.32,0.11,0.56]
        ),
        models.PointStruct(
            id=9,
            payload = {
                "color":"red",
                "size" : "small"
            },
            vector = [0.22,0.51,0.01]
        ),
        models.PointStruct(
            id=10,
            payload = {
                "color":"blue",
                "size" : "large"
            },
            vector = [0.2,0.35,0.27]
        ),
        models.PointStruct(
            id=11,
            payload = {
                "color":"red",
                "size" : "large"
            },
            vector = [0.11,0.63,0.92]
        ),
        models.PointStruct(
            id=12,
            payload = {
                "color":"green",
                "size" : "medium"
            },
            vector = [0.23,0.82,0.61]
        )
        
    ]
)

UpdateResult(operation_id=11, status=<UpdateStatus.COMPLETED: 'completed'>)

In [4]:
#setting payloads for points having no size payload

client.set_payload(
    collection_name = "qdrant_basics",
    payload = {"size":"small"},
    points = [2,3]
)

UpdateResult(operation_id=12, status=<UpdateStatus.COMPLETED: 'completed'>)

In [12]:
client.set_payload(
    collection_name= "qdrant_basics",
    payload = {
        "color" : "black",
        "size" : "large"
    },
    points = [1]
)

UpdateResult(operation_id=8, status=<UpdateStatus.COMPLETED: 'completed'>)

In [13]:
client.create_payload_index(
    collection_name="qdrant_basics",
    field_name = "size",
    field_schema = "keyword"
)

UpdateResult(operation_id=10, status=<UpdateStatus.COMPLETED: 'completed'>)

In [12]:
#Facet Counts is like the MySQL count aggregation with where and group by clause in vector database
client.facet(
    collection_name = "qdrant_basics",
    key = "color", #this is like the gorup by clause where the count is done grouped by color key(payload)
    facet_filter= models.Filter(must=[models.FieldCondition(
        key = "size",
        match = models.MatchValue(value="large")
    )])
    #this is like the where clause where the count is 
    #only done on the basis of the points where size = large
    #this is optional field 
)

FacetResponse(hits=[FacetValueHit(value='blue', count=2), FacetValueHit(value='black', count=1), FacetValueHit(value='red', count=1)])

In [5]:
client.facet(
    collection_name = "qdrant_basics",
    key = "color"
)

FacetResponse(hits=[FacetValueHit(value='red', count=4), FacetValueHit(value='blue', count=3), FacetValueHit(value='green', count=3), FacetValueHit(value='black', count=2)])

# Searching


In [13]:
client.query_points(
    collection_name = "qdrant_basics",
    query = [0.1,0.2,0.3]
)

QueryResponse(points=[ScoredPoint(id=1, version=8, score=0.9999998, payload={'color': 'black', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=12, score=0.9951674, payload={'color': 'blue', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=11, version=11, score=0.9851332, payload={'color': 'red', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=12, score=0.98176813, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=11, score=0.9780219, payload={'color': 'red', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=7, version=11, score=0.9572706, payload={'color': 'blue', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=12, version=11, score=0.9439634, payload={'color': 'green', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), Sc

In [14]:
#searching with Filtering, Limits and Params

client.query_points(
    collection_name = "qdrant_basics",
    query = [0.22,0.34,0.45],

    query_filter = models.Filter(
        must = [
            models.FieldCondition(
                key = "color",
                match = models.MatchValue(value = "red")
            )
        ]

    ),
    search_params = models.SearchParams(hnsw_ef = 128,exact = False), #here the search params are the custom parameters for search
    #hnsw = graph search algorithm and the ef paramater determines the number nearest neighbours to expand at once, 
    # exact determines whether or not to search for the point with the exact vector
    #indexed_only = determines whether or not to include the vectors that are indexed
    limit = 4 #how many points to return from the search

)

QueryResponse(points=[ScoredPoint(id=5, version=11, score=0.97903967, payload={'color': 'red', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=11, version=11, score=0.9618025, payload={'color': 'red', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=12, score=0.9571761, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=9, version=11, score=0.67289865, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None)])

In [24]:
#searching with more params

client.query_points(
    collection_name = "qdrant_basics",
    query = [0.21,0.32,0.43],
    with_vectors = True,
    with_payload = models.PayloadSelectorExclude(
        exclude = ["color"]
    )
)

QueryResponse(points=[ScoredPoint(id=2, version=12, score=0.99638414, payload={'size': 'small'}, vector=[0.29573527, 0.604913, 0.73933816], shard_key=None, order_value=None), ScoredPoint(id=1, version=8, score=0.99351275, payload={'size': 'large'}, vector=[0.2672612, 0.5345224, 0.8017837], shard_key=None, order_value=None), ScoredPoint(id=7, version=11, score=0.9810333, payload={'size': 'large'}, vector=[0.43731022, 0.6663775, 0.6039046], shard_key=None, order_value=None), ScoredPoint(id=5, version=11, score=0.97779953, payload={'size': 'medium'}, vector=[0.22614412, 0.69899094, 0.6784324], shard_key=None, order_value=None), ScoredPoint(id=10, version=11, score=0.9670415, payload={'size': 'large'}, vector=[0.41221783, 0.7213812, 0.55649406], shard_key=None, order_value=None), ScoredPoint(id=6, version=11, score=0.9670415, payload={'size': 'small'}, vector=[0.41221783, 0.7213812, 0.55649406], shard_key=None, order_value=None), ScoredPoint(id=11, version=11, score=0.96168864, payload={'s

In [42]:
#batch search 
# so we create different single requests and batch them together in a single request 
filter_ = models.Filter(
    must = [
        models.FieldCondition(
            key = "color",
            match = models.MatchValue(value = "red")
        )
    ]
)
search_params = models.SearchParams(hnsw_ef = 128, exact = False )
search_queries = [
    models.QueryRequest(query=[0.1,0.2,0.3],filter=filter_,limit = 3),
    models.QueryRequest(query = [0.11,0.22,0.33],filter = filter_, limit = 5, params = search_params)
]

client.query_batch_points(collection_name = "qdrant_basics",requests = search_queries)   

[QueryResponse(points=[ScoredPoint(id=11, version=11, score=0.9851332, payload=None, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=12, score=0.98176813, payload=None, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=11, score=0.9780219, payload=None, vector=None, shard_key=None, order_value=None)]),
 QueryResponse(points=[ScoredPoint(id=11, version=11, score=0.98513323, payload=None, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=12, score=0.98176825, payload=None, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=11, score=0.97802204, payload=None, vector=None, shard_key=None, order_value=None), ScoredPoint(id=9, version=11, score=0.6110008, payload=None, vector=None, shard_key=None, order_value=None)])]

In [None]:
#query by id
#using the id of a point which fetches the vector of that point and use it as query

client.query_points(
    collection_name = "qdrant_basics",
    query = 1 #point 1 is excluded
)

QueryResponse(points=[ScoredPoint(id=2, version=12, score=0.9951674, payload={'color': 'blue', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=11, version=11, score=0.9851332, payload={'color': 'red', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=12, score=0.98176813, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=11, score=0.9780219, payload={'color': 'red', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=7, version=11, score=0.9572706, payload={'color': 'blue', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=12, version=11, score=0.9439634, payload={'color': 'green', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=10, version=11, score=0.9419521, payload={'color': 'blue', 'size': 'large'}, vector=None, shard_key=None, order_value=None), S

In [None]:
#search with an offset. So if the offset is set to 100 the search result skips all the 100 search result and starts
#after the first 100 results. This is known as pagination

client.query_points(
    collection_name = "qdrant_basics",
    query = [0.11,0.22,0.23],
    offset = 2,
    with_vectors = True,
    with_payload = True,
    limit = 4
)

QueryResponse(points=[ScoredPoint(id=7, version=11, score=0.9906639, payload={'color': 'blue', 'size': 'large'}, vector=[0.43731022, 0.6663775, 0.6039046], shard_key=None, order_value=None), ScoredPoint(id=10, version=11, score=0.9860201, payload={'color': 'blue', 'size': 'large'}, vector=[0.41221783, 0.7213812, 0.55649406], shard_key=None, order_value=None), ScoredPoint(id=6, version=11, score=0.9860201, payload={'color': 'black', 'size': 'small'}, vector=[0.41221783, 0.7213812, 0.55649406], shard_key=None, order_value=None), ScoredPoint(id=1, version=8, score=0.9841269, payload={'color': 'black', 'size': 'large'}, vector=[0.2672612, 0.5345224, 0.8017837], shard_key=None, order_value=None)])

In [None]:
# Group Searching

client.query_points_groups(
    collection_name = "qdrant_basics",
    group_by = "color",
    limit = 4,
    query = [0.1,0.2,0.3],
    group_size = 2
)

GroupsResult(groups=[PointGroup(hits=[ScoredPoint(id=1, version=8, score=0.9999998, payload={'color': 'black', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=6, version=11, score=0.9419521, payload={'color': 'black', 'size': 'small'}, vector=None, shard_key=None, order_value=None)], id='black', lookup=None), PointGroup(hits=[ScoredPoint(id=2, version=12, score=0.9951674, payload={'color': 'blue', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=7, version=11, score=0.9572706, payload={'color': 'blue', 'size': 'large'}, vector=None, shard_key=None, order_value=None)], id='blue', lookup=None), PointGroup(hits=[ScoredPoint(id=11, version=11, score=0.9851332, payload={'color': 'red', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=12, score=0.98176813, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None)], id='red', lookup=None), PointGroup(hits

In [51]:
#Random Sampling
#this is used to use random sample just to retrieve the points
client.query_points(
    collection_name = "qdrant_basics",
    query = models.SampleQuery(sample = models.Sample.RANDOM)
)

QueryResponse(points=[ScoredPoint(id=12, version=0, score=0.0, payload={'color': 'green', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=0, score=0.0, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=0, score=0.0, payload={'color': 'red', 'size': 'medium'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=10, version=0, score=0.0, payload={'color': 'blue', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=11, version=0, score=0.0, payload={'color': 'red', 'size': 'large'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=0, score=0.0, payload={'color': 'blue', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=9, version=0, score=0.0, payload={'color': 'red', 'size': 'small'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=4, version=0, score=0.0, payload={'co

In [59]:
#Parametarized Indexing of the payloads
client.create_payload_index(
    collection_name = "qdrant_basics",
    field_name = "age",
    field_schema = models.IntegerIndexParams(
        type = models.IntegerIndexType.INTEGER,
        lookup = False,
        range = True
    )
)

UpdateResult(operation_id=19, status=<UpdateStatus.COMPLETED: 'completed'>)

In [64]:
client.create_payload_index(
    collection_name = "qdrant_basics",
    field_name = "color",
    field_schema = models.KeywordIndexParams(
        type = models.KeywordIndexType.KEYWORD,
        on_disk=True
    )
)

UpdateResult(operation_id=23, status=<UpdateStatus.COMPLETED: 'completed'>)

In [65]:
client.set_payload(
    collection_name = "qdrant_basics",
    payload = {"tenant_id":"A"},
    points = [1,2,3,4,5,6]
)

UpdateResult(operation_id=24, status=<UpdateStatus.COMPLETED: 'completed'>)

In [66]:
client.set_payload(
    collection_name = "qdrant_basics",
    payload = {"tenant_id":"B"},
    points = [7,8,9,10,11,12]
)

UpdateResult(operation_id=25, status=<UpdateStatus.COMPLETED: 'completed'>)

In [67]:
client.create_payload_index(
    collection_name = "qdrant_basics",
    field_name = "tenant_id",
    field_schema=models.KeywordIndexParams(
        type = models.KeywordIndexType.KEYWORD,
        is_tenant = True
    )
    
)

UpdateResult(operation_id=27, status=<UpdateStatus.COMPLETED: 'completed'>)

In [56]:
client.set_payload(
    collection_name = "qdrant_basics",
    payload = {
        "age" :12
    },
    points= [1,2,3,4]
)

UpdateResult(operation_id=15, status=<UpdateStatus.COMPLETED: 'completed'>)

In [57]:
client.set_payload(
    collection_name = "qdrant_basics",
    payload = {
        "age" :13
    },
    points= [5,6,7,8]
)

UpdateResult(operation_id=16, status=<UpdateStatus.COMPLETED: 'completed'>)

In [58]:
client.set_payload(
    collection_name = "qdrant_basics",
    payload = {
        "age" :14
    },
    points= [9,10,11,12]
)

UpdateResult(operation_id=17, status=<UpdateStatus.COMPLETED: 'completed'>)

In [61]:
client.query_points(
    collection_name="qdrant_basics",
    query=[0.1,0.2,0.3],
    query_filter=models.Filter(
        must=[models.FieldCondition(
            key="age", 
            range=models.Range(gte=10, lte=13)
        )]
    )
)

QueryResponse(points=[ScoredPoint(id=1, version=15, score=0.9999998, payload={'color': 'black', 'size': 'large', 'age': 12}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=15, score=0.9951674, payload={'color': 'blue', 'size': 'small', 'age': 12}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=15, score=0.98176813, payload={'color': 'red', 'size': 'small', 'age': 12}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=16, score=0.9780219, payload={'color': 'red', 'size': 'medium', 'age': 13}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=7, version=16, score=0.9572706, payload={'color': 'blue', 'size': 'large', 'age': 13}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=6, version=16, score=0.9419521, payload={'color': 'black', 'size': 'small', 'age': 13}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=4, version=15, score=0.9078039, payload={'color': 'green', 's

# Named Vectors Collection

In [15]:
#creation of multiple vectors for a point named vectors

client.create_collection(
    collection_name = "named_vectored_collection",
    vectors_config = {
        "image" : models.VectorParams(size = 3, distance=models.Distance.COSINE),
        "text" : models.VectorParams(size = 4, distance=models.Distance.COSINE)
    }
)

True

In [17]:
#inserting 10 points in the named Vectors

client.upsert(
    collection_name = "named_vectored_collection",
    points = [
        models.PointStruct(
            id=1,
            payload = {"color":"red", "size" : "XXL" },
            vector = {
                "image" :[0.1,0.2,0.3],
                "text" : [0.11,0.21,0.31,0.2]
            },

        )
    ]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [18]:
client.upsert(
    collection_name = "named_vectored_collection",
    points = [
        models.PointStruct(
            id=2,
            vector = {
                "image" :[0.23,0.44,0.33],
                "text" : [0.11,0.2,0.23,0.71]
            }
        ),
        models.PointStruct(
            id=3,
            vector = {
                "image" :[0.21,0.34,0.83],
                "text" : [0.71,0.87,0.91,0.21]
            }
        ),
        models.PointStruct(
            id=4,
            vector = {
                "image" :[0.12,0.73,0.93],
                "text" : [0.01,0.02,0.03,0.11]
            }
        )
    ]
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [19]:
client.set_payload(
    collection_name = "named_vectored_collection",
    payload = {
        "color":"black",
        "size":"XL"
    },
    points = [2,3,4]
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [21]:
client.create_payload_index(
    collection_name = "named_vectored_collection",
    field_name = "color",
    field_schema = "keyword"
)


UpdateResult(operation_id=4, status=<UpdateStatus.COMPLETED: 'completed'>)

In [22]:
client.create_payload_index(
    collection_name = "named_vectored_collection",
    field_name = "size",
    field_schema = "keyword"
)

UpdateResult(operation_id=6, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
#searching in the named vector

client.query_points(
    collection_name = "named_vectored_collection",
    query = [0.2,0.1,0.3],
    using = "image" # this specifies which vector among the text and image to use for the search 
)

QueryResponse(points=[ScoredPoint(id=3, version=2, score=0.9429046, payload={'color': 'black', 'size': 'XL'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=1, version=0, score=0.9285713, payload={'color': 'red', 'size': 'XXL'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=2, score=0.8473035, payload={'color': 'black', 'size': 'XL'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=4, version=2, score=0.8456209, payload={'color': 'black', 'size': 'XL'}, vector=None, shard_key=None, order_value=None)])

# Snapshots

In [69]:
client.create_snapshot(
    collection_name = "qdrant_basics"
)

SnapshotDescription(name='qdrant_basics-8263441514127146-2025-09-24-10-37-23.snapshot', creation_time='2025-09-24T10:37:23', size=565760, checksum='36e7eca95ec09fac11ca465c7a7ab45b828e204dfe411ffe02f04f4796f3920a')

In [70]:
client.list_snapshots(
    collection_name = "qdrant_basics"
)

[SnapshotDescription(name='qdrant_basics-8263441514127146-2025-09-24-10-37-23.snapshot', creation_time='2025-09-24T10:37:23', size=565760, checksum='36e7eca95ec09fac11ca465c7a7ab45b828e204dfe411ffe02f04f4796f3920a')]

# RRF and Hybrid Queries


In [83]:
client.create_collection(
    collection_name="for_rrf",
    vectors_config={"dense":models.VectorParams(size=3, distance=models.Distance.COSINE)},
    sparse_vectors_config = {"sparse":models.SparseVectorParams()}
)

True

In [5]:
client.upsert(
    collection_name = "for_rrf",
    points= [
    models.PointStruct(
        id =2,
        payload = {"text":"Cats are lovely pets"},
        vector = {
            "dense":[0.11, 0.25, 0.37],
            "sparse":models.SparseVector(indices=[10, 42],values=[0.7, 0.3])
            
        },
        
    ),
    models.PointStruct(
        id =3,
        payload = {"text":"Dogs are loyal companion"},
        vector = {
            "dense":[0.15, 0.22, 0.39],
            "sparse":models.SparseVector(indices=[15, 42],values=[0.5, 0.4])
            
        },
        
    ),
    models.PointStruct(
        id =4,
        payload = {"text":"Cats and dogs often play together"},
        vector = {
            "dense":[0.14, 0.28, 0.36],
            "sparse":models.SparseVector(indices=[10, 15],values=[0.6, 0.5])
            
        },
        
    ),
    ]
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [6]:
client.query_points(
    collection_name = "for_rrf",
    prefetch = [models.Prefetch(
        query = [0.1,0.2,0.3],
        using = "dense"
    ),
    models.Prefetch(
        query = models.SparseVector(indices = [10,42],values=[0.61,0.43]),
        using = "sparse"
    ),

    ],
    query = models.FusionQuery(fusion = models.Fusion.RRF)
)

QueryResponse(points=[ScoredPoint(id=2, version=1, score=1.0, payload={'text': 'Cats are lovely pets'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=4, version=1, score=0.6666667, payload={'text': 'Cats and dogs often play together'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=3, version=1, score=0.5, payload={'text': 'Dogs are loyal companion'}, vector=None, shard_key=None, order_value=None)])

In [7]:
client.query_points(
    collection_name="qdrant_basics",
    prefetch=models.Prefetch(
        query=[1, 23, 45, 67],  # <------------- small byte vector
        using="mrl_byte",
        limit=1000,
    ),
    query=[0.01, 0.299, 0.45, 0.67],  # <-- full vector
    using="full",
    limit=10,
)

UnexpectedResponse: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Wrong input: Vector with name mrl_byte is not configured in this collection"},"time":0.003066627}'

# Qdrant Basic for a Json File

In [None]:
from qdrant_client import QdrantClient, models


In [29]:
import json
import re
import os

In [2]:
from sentence_transformers import SentenceTransformer

In [36]:
#variables for Qdrant Database creation
json_file_path = "accessories-fashion.json"
base_name = os.path.splitext(os.path.basename(json_file_path))[0]
chunk_size = 256
collection_name_json = "daraz_product_camera"
embedding_model_name = "all-MiniLM-L6-v2"

In [19]:
#chunking function

def chunk_text(text,chunk_size):
    start = 0
    tokens = re.findall(r'\w+|[{}[\]:,",]', text)
    chunks = []
    while start<=len(tokens):
        end = min(start+chunk_size,len(tokens))
        chunk = " ".join(tokens[start:end])
        chunks.append(chunk)
        start+=chunk_size

    return chunks

In [20]:
#loading the json file and dumping it into a string

with open(json_file_path,'r',encoding="utf-8") as f:
    data=json.load(f)

#dumping into a raw string

raw_text = json.dumps(data)

In [21]:
chunks = chunk_text(raw_text,chunk_size=256)

print(len(chunks))


128


In [22]:
#embedding and storing in a collection

encoder = SentenceTransformer("all-MiniLM-L6-v2")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [24]:
client = QdrantClient(url="http://localhost:6333")

In [25]:
encoder.get_sentence_embedding_dimension()

384

In [39]:
client.create_collection(
    collection_name = "daraz_items",
    vectors_config = models.VectorParams(size = encoder.get_sentence_embedding_dimension(), distance = models.Distance.COSINE)
)

True

In [40]:
payloads = {"file_name":base_name}

In [32]:
embeddings = encoder.encode(chunks,show_progress_bar = True, convert_to_numpy = True)

Batches: 100%|██████████| 4/4 [00:04<00:00,  1.14s/it]


In [33]:
embeddings

array([[-0.02483561,  0.14702807,  0.02601837, ..., -0.03605575,
         0.01890442,  0.04215562],
       [-0.01898901,  0.08296324,  0.01006372, ...,  0.03021486,
        -0.01531859,  0.05498528],
       [-0.01357948,  0.07507695, -0.01538894, ...,  0.01180666,
        -0.03455568,  0.05093301],
       ...,
       [-0.03333918,  0.12599342,  0.00798083, ...,  0.00685655,
         0.02804666,  0.0272413 ],
       [-0.01251123,  0.04299986, -0.01217078, ...,  0.00742458,
        -0.05773082,  0.06933547],
       [ 0.04940993,  0.12144326,  0.05377852, ..., -0.05159558,
        -0.01308045,  0.01573179]], shape=(128, 384), dtype=float32)

# Storing in the database

In [41]:
client.upsert(
    collection_name = "daraz_items",
    points = [
        models.PointStruct(
            id = idx, vector = embeddings[idx], payload = payloads)
            for idx in range(len(chunks))
    ]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [38]:
client.delete_collection(
    collection_name = "daraz_items"
)



True

In [42]:
client.create_payload_index(
    collection_name = "daraz_items",
    field_name = "file_name",
    field_schema = models.KeywordIndexParams(type = models.KeywordIndexType.KEYWORD)
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [43]:
hits = client.query_points(
    collection_name = "daraz_items",
    query = encoder.encode("Suandesha QILENS 300mm CCTV camera lens 1/3 Image Format Long Viewing Distance M12 Mount Horizontal View Angle 1.15D Focus").tolist(),
    limit = 3
).points

for hit in hits:
    print(hit.payload,"score",hit.score)

{'file_name': 'accessories-fashion'} score 0.50625324
{'file_name': 'accessories-fashion'} score 0.4925301
{'file_name': 'accessories-fashion'} score 0.45574063
