In [None]:
# sudo docker run -p 6333:6333 -v $(pwd)/qdrant_storage:/qdrant/storage qdrant/qdrant

In [86]:
from uuid import uuid4

from langchain.document_loaders.text import TextLoader
from langchain.schema.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.exceptions import UnexpectedResponse

from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())


MEMORY_PATH = "27_qddrant/memory.md"
COLLECTION_NAME = "ai_devs"


# localhost can be changed to: os.environ['QDRANT_URL']
client = QdrantClient("localhost", port=6333)

embeddings = OpenAIEmbeddings()
query = "Do you know the name of Adam's dog?"
query_embedding = embeddings.embed_query(query)
result = client.get_collections()

# Check if collection exists
indexed = next(
    (
        collection
        for collection in result.collections
        if collection.name == COLLECTION_NAME
    ),
    None,
)
print(result)

collections=[CollectionDescription(name='first_test_collection')]


In [88]:
if not indexed:
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=models.VectorParams(
            size=1536, distance=models.Distance.COSINE, on_disk=True
        ),
    )

In [93]:
collection_info = client.get_collection(COLLECTION_NAME)
if not collection_info.points_count:
    MEMORY_PATH = "memory.md"
    loader = TextLoader(MEMORY_PATH)
    memory = loader.load()
    documents = [
        Document(
            page_content=content,
            metadata={
                "content": content,
                "source": COLLECTION_NAME,
                "uuid": str(uuid4()),
            },
        )
        for content in memory[0].page_content.split("\n\n")
    ]

    points = [
        {
            "id": document.metadata["uuid"],
            "payload": document.metadata,
            "vector": embeddings.embed_documents([document.page_content])[0],
        }
        for document in documents
    ]

    ids, vectors, payloads = zip(
        *((point["id"], point["vector"], point["payload"]) for point in points)
    )

    client.upsert(
        COLLECTION_NAME,
        points=models.Batch(ids=ids, payloads=payloads, vectors=vectors),
    )

query_filter = models.Filter(
    must=[
        models.FieldCondition(
            key="source",
            match=models.MatchValue(value=COLLECTION_NAME),
        )
    ]
)
search = client.search(
    COLLECTION_NAME, query_vector=query_embedding, limit=1, query_filter=query_filter
)
print(search)

[ScoredPoint(id='852098bb-af02-480a-9c11-0027c6193e83', version=0, score=0.90141785, payload={'content': 'Adam have a dog named Alexa.', 'source': 'ai_devs', 'uuid': '852098bb-af02-480a-9c11-0027c6193e83'}, vector=None)]
