In [1]:
%pip install -qU langchain-qdrant

Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install -qU langchain-ollama

Note: you may need to restart the kernel to use updated packages.


# Initialization

In [3]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3")

### in-memory

For testing and quick experiments, so data gets removed at the end of the script/notebook.

In [4]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings
)

### on-disk storage

Local mode without accessing Qdrant server, also allows vectors to persist between runs

In [7]:
client = QdrantClient(path="/tmp/langchain_qdrant_disk")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings,
)

# Using an existing collection

In [8]:
qdrant = QdrantVectorStore.from_existing_collection(
    embedding=embeddings,
    collection_name="my_documents",
    url="http://localhost:6333",
)

ResponseHandlingException: [WinError 10061] Impossibile stabilire la connessione. Rifiuto persistente del computer di destinazione

# Manage vector store
Once created the vector store you can interact.

### Add items

In [9]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents, ids=uuids)

['2503db59-4df6-4584-b312-dccbfd35676e',
 '7651a686-4ffe-4efb-9c2a-b643d248c415',
 'e2ed02a6-8dd4-459b-b1ae-5f0b0e1462b8',
 '2365c3e5-3d13-4aa4-b500-8ba0410ea7cb',
 'a072c401-b9d8-446a-b738-724b1403a339',
 'f907a7c4-63a2-4158-9658-1c0a1da7af67',
 '2cfe6a7e-f49f-4970-a124-66c98f339666',
 '74394f9e-94f8-4914-9502-3829cdcaf663',
 '1821a577-45b4-4e90-9823-562bb559b137',
 'a5e85bf8-1ae6-4bd2-87d3-7d3ce06bef29']

### Delete items

In [10]:
# Remove the last element
vector_store.delete(ids = [uuids[0], uuids[1], uuids[2]])

True

# Query vector store

### Query directly
Performs a similarity search: the query will be encoded into vector embeddings and used to find similar documents in a Qdrant collection.

In [11]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy", k=2
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet', '_id': '74394f9e-94f8-4914-9502-3829cdcaf663', '_collection_name': 'demo_collection'}]
* Is the new iPhone worth the price? Read this review to find out. [{'source': 'website', '_id': 'f907a7c4-63a2-4158-9658-1c0a1da7af67', '_collection_name': 'demo_collection'}]


QdrantVectorStore supports 3 modes for similarity searches. They can be configured using the retrieval_mode parameter.

- Dense Vector Search (default)
- Sparse Vector Search
- Hybrid Search

### Dense Vector Search
Involves calculating similarity via vector-based embeddings.
retrieval_mode should be set to RetrievalMode.DENSE

In [23]:
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

# Create a Qdrant client for local storage
client = QdrantClient(url="http://localhost:6333")  # Requires Qdrant server

collection_name = "my_documents"

# Delete existing collection if it exists
try:
    client.delete_collection(collection_name)
    print(f"Deleted existing collection '{collection_name}'")
except Exception as e:
    print(f"Collection didn't exist or couldn't be deleted: {e}")


# Create a collection with dense vectors
client.create_collection(
    collection_name="my_documents",
    vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="my_documents",
    embedding=embeddings,
    retrieval_mode=RetrievalMode.DENSE,
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "How much money did the robbers steal?"
found_docs = qdrant.similarity_search(query)
found_docs

Deleted existing collection 'my_documents'


[Document(metadata={'source': 'news', '_id': '2365c3e5-3d13-4aa4-b500-8ba0410ea7cb', '_collection_name': 'my_documents'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'website', '_id': 'f907a7c4-63a2-4158-9658-1c0a1da7af67', '_collection_name': 'my_documents'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'tweet', '_id': 'e2ed02a6-8dd4-459b-b1ae-5f0b0e1462b8', '_collection_name': 'my_documents'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news', '_id': '7651a686-4ffe-4efb-9c2a-b643d248c415', '_collection_name': 'my_documents'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.')]

### Sparse Vector Search
retrieval_mode set to RetrievalMode.SPARSE

In [25]:
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams

sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")

# Create a Qdrant client for local storage
client = QdrantClient(url="http://localhost:6333")  # Requires Qdrant server

collection_name = "my_documents"

# Delete existing collection if it exists
try:
    client.delete_collection(collection_name)
    print(f"Deleted existing collection '{collection_name}'")
except Exception as e:
    print(f"Collection didn't exist or couldn't be deleted: {e}")

    
# Create a collection with sparse vectors
client.create_collection(
    collection_name="my_documents",
    vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
    sparse_vectors_config={
        "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
    },
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="my_documents",
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.SPARSE,
    sparse_vector_name="sparse",
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "How much money did the robbers steal?"
found_docs = qdrant.similarity_search(query)
found_docs

Deleted existing collection 'my_documents'


[Document(metadata={'source': 'news', '_id': '2365c3e5-3d13-4aa4-b500-8ba0410ea7cb', '_collection_name': 'my_documents'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

### Hybrid Vector Search
Performs hybrid search using dense and sparse vectors with score fusion.
retrival_mode set to RetrievalMode.HYBRID

In [28]:
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams

sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")

# Create a Qdrant client for local storage
client = QdrantClient(url = "http://localhost:6333")  # Requires Qdrant server

# Delete existing collection if it exists
try:
    client.delete_collection(collection_name)
    print(f"Deleted existing collection '{collection_name}'")
except Exception as e:
    print(f"Collection didn't exist or couldn't be deleted: {e}")


# Create a collection with both dense and sparse vectors
client.create_collection(
    collection_name="my_documents",
    vectors_config={"dense": VectorParams(size=4096, distance=Distance.COSINE)},
    sparse_vectors_config={
        "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
    },
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="my_documents",
    embedding=embeddings,
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.HYBRID,
    vector_name="dense",
    sparse_vector_name="sparse",
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "How much money did the robbers steal?"
found_docs = qdrant.similarity_search(query)
found_docs

Deleted existing collection 'my_documents'


[Document(metadata={'source': 'news', '_id': '2365c3e5-3d13-4aa4-b500-8ba0410ea7cb', '_collection_name': 'my_documents'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'website', '_id': 'f907a7c4-63a2-4158-9658-1c0a1da7af67', '_collection_name': 'my_documents'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'tweet', '_id': 'e2ed02a6-8dd4-459b-b1ae-5f0b0e1462b8', '_collection_name': 'my_documents'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news', '_id': '7651a686-4ffe-4efb-9c2a-b643d248c415', '_collection_name': 'my_documents'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.')]

To return the scores

In [30]:
results = vector_store.similarity_search_with_score(
    query="Will it be hot tomorrow", k=1
)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.386851] I have a bad feeling I am going to get deleted :( [{'source': 'tweet', '_id': 'a5e85bf8-1ae6-4bd2-87d3-7d3ce06bef29', '_collection_name': 'demo_collection'}]


# Filtering

It is possible to use filters by passing an additional param to similarity_search_with_score and similarity_search methods.

In [31]:
from qdrant_client import models

results = vector_store.similarity_search(
    query="Who are the best soccer players in the world?",
    k=1,
    filter=models.Filter(
        should=[
            models.FieldCondition(
                key="page_content",
                match=models.MatchValue(
                    value="The top 10 soccer players in the world right now."
                ),
            ),
        ]
    ),
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* The top 10 soccer players in the world right now. [{'source': 'website', '_id': '2cfe6a7e-f49f-4970-a124-66c98f339666', '_collection_name': 'demo_collection'}]


### Turning vector store into 

"mmr" = maximal marginal relevance, balances relevance (similarity to the query) and diversity (avoids duplicates)
k = 1 (single best result)

In [32]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 1})
retriever.invoke("Stealing from the bank is a crime")

[Document(metadata={'source': 'tweet', '_id': 'a5e85bf8-1ae6-4bd2-87d3-7d3ce06bef29', '_collection_name': 'demo_collection'}, page_content='I have a bad feeling I am going to get deleted :(')]