In [2]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import uuid
import os
load_dotenv()

True

In [3]:
client = QdrantClient(url=os.environ['QDRANT_API_URL'], api_key=os.environ['QDRANT_API_KEY'])

In [4]:
client.create_collection(
      collection_name="graphrag-test",
      vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

True

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
    separators=["\n\n", "\n"]
    )

In [6]:
embeddings_model = OpenAIEmbeddings(api_key=os.environ['GRAPHRAG_API_KEY'])

In [7]:
for file in os.listdir('./data/'):

    with open(f'./data/{file}', 'r') as f:
        text = f.read()

    chunks = text_splitter.split_text(text)
    embeddings = embeddings_model.embed_documents(chunks)

    client.upsert(
        collection_name="graphrag-test",
        points=[
        PointStruct(
            id= uuid.uuid1().hex,
            vector=vector,
            payload={"doc": doc}
            ) for doc, vector in zip(chunks, embeddings)]
        )

## Upload some query vectors

In [8]:
queries = [
    "Que paso en las elecciones de 2024",
    "Quien es Kamala Harris?",
    "Que paso en Valencia?",
    "Cuales son las noticias mas importantes de hoy?",
    "Que paso hoy en el mundo?"
]

In [9]:
_id = 1
embeddings = embeddings_model.embed_documents(queries)
indices = range(_id, _id + len(queries))

client.upsert(
        collection_name="graphrag-test",
        points=[
        PointStruct(
            id=idx,
            vector=vector,
            payload={"doc": doc}
            ) for idx, doc, vector in zip(indices, queries, embeddings)]
        )

UpdateResult(operation_id=200, status=<UpdateStatus.COMPLETED: 'completed'>)