In [1]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "C:\\Home\\Documents\\Projects\\models\\BAAI\\bge-large-en-v1.5"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  from tqdm.autonotebook import tqdm, trange


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs_list)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(host="localhost", port=6333)

client.create_collection(
    collection_name="agentic_rag",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="agentic_rag",
    embedding=embedding_model,
)

In [4]:
from uuid import uuid4

ids = [str(uuid4()) for _ in range(len(doc_splits))]
vector_store.add_documents(documents=doc_splits, ids=ids)

['41329066-a18b-41e0-aae1-76e8622d3957',
 '586bc104-5e47-4706-964d-22b3d0ee3316',
 '46f4b476-9f92-43e4-83d5-915c7b6bec63',
 '4cf86eb6-c87d-40e6-9560-03238c7ec01e',
 '9ce919f0-c5d1-4108-88dd-31595079eb82',
 '7fc63981-178a-4919-8b2b-f9638bd51570',
 'ef926e20-96c3-4bde-979e-76198139b5b8',
 'e4437604-9122-4685-8cbc-3a6e5f77a4e2',
 '3d0cd830-2cd5-4e23-ab38-9ae356e4b711',
 'd042c66a-f7f3-4ed2-ab7a-73ad89a1bd7d',
 '32be5ebd-1a2f-4e9b-9d26-85e5cacaa1d6',
 '68ffcdd9-af9b-46e1-9c21-924640bbb145',
 'c054a467-04c9-484a-bb0d-3c514685e936',
 '521e3e5a-4c26-4c1b-b9e5-d40ccd3747df',
 '9469e671-4736-42b1-a2ef-373bcd45cfef',
 '07210f44-7585-477a-b3ab-37cb41bf1939',
 '192179c7-4d0a-4c06-9d6b-7ec568e41a21',
 '21be78cf-4a4e-46f4-8b8a-26697c891a83',
 'fd56b52b-0022-479f-909d-bb64f34e3fee',
 '66dbc72e-f27a-4e85-9329-31098f3fb632',
 '38387031-9798-44e0-a666-6e599efa9c8a',
 '5d4720d2-f6d3-48b6-94a1-392a35ab7642',
 'a3ba0e49-1d20-4cf7-a66f-c569ccd2b375',
 'd9140a06-ad51-4df8-8264-b629613e134f',
 '8ba8a79b-d70f-