In [11]:
from llama_index.core import Document, GPTVectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from qdrant_client import QdrantClient
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from llama_index.vector_stores.qdrant import QdrantVectorStore

In [2]:
# Step 1: Load PDF document and split into chunks
loader = PyPDFLoader("SWE-Bench.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [3]:
# Step 2: Load HuggingFace BGE Embedding Model
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from tqdm.autonotebook import tqdm, trange





modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.3k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.huggingface.co/repos/60/5e/605eb2707e17d287d9db515a55d5abd41f99516f676822cdf999ae87d847c1a2/37136ad03a0da3ea220bc31850c5b49f39d56fa0d99ebd48887d0c9bb60ad5d1?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1726909195&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNjkwOTE5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy82MC81ZS82MDVlYjI3MDdlMTdkMjg3ZDlkYjUxNWE1NWQ1YWJkNDFmOTk1MTZmNjc2ODIyY2RmOTk5YWU4N2Q4NDdjMWEyLzM3MTM2YWQwM2EwZGEzZWEyMjBiYzMxODUwYzViNDlmMzlkNTZmYTBkOTllYmQ0ODg4N2QwYzliYjYwYWQ1ZDE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=Ppc9Ug5RtduHxbCcfa%7EZ2jRjSvWBrDHH6lzoaoDhh57Js0B78NhCuNywFHohVN8J-2sijPGQx8f9SF2KeAY8BjfnVMxuR-eNT0E8cHOyxUh2K%7Eniv9Vo4gO4QX0jFLSJCU0GXBatJVNkbXjpXyTq7SoShdqbAnyznOZFnLXwVN1BRnpC4bfLJB3vRnsN%7E5Vek1LTi9xNpspORNm6wYgkrBlIKC2G4LCUJlP4FLU6jEk9zfeh

model.safetensors:  89%|########9 | 1.20G/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [4]:
# Step 3: Initialize Qdrant Client for the vector store
qdrant_client = QdrantClient(
    url="https://c99d7921-24d0-4759-8836-938fa2f15d91.europe-west3-0.gcp.cloud.qdrant.io:6333", 
    api_key="6cz2PvQQIfqLu2ALzGiIOCUSMBEaLV2W4MudoxYXOfywU4kt3Mu6Cw",
)


In [7]:
# Step 4: Convert text chunks into LlamaIndex Document objects
llama_documents = [Document(text=doc.page_content) for doc in texts]

In [8]:
# Step 4: Convert the texts into nodes for LlamaIndex (equivalent of documents in LangChain)
node_parser = SimpleNodeParser()
nodes = node_parser.get_nodes_from_documents(llama_documents)

In [9]:
# Step 5: Initialize the vector store with Qdrant and the HuggingFace embeddings
vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name="vector_db",  # The collection name in Qdrant
    embedding_model=embeddings  # Use the HuggingFace BGE embeddings
)

In [13]:
# Step 6: Build the LlamaIndex with the Qdrant vector store
index = GPTVectorStoreIndex(
    nodes=nodes,
    vector_store=vector_store,
    embed_model=embeddings  # Explicitly pass the HuggingFace embeddings model
)

In [30]:
# Step 7: Query the index without using an LLM
query_engine = index.as_query_engine(llm=None)
response = query_engine.query("What is the SWE-Bench benchmark?")
print(response)

ValueError: 
******
Could not load OpenAI model. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

To disable the LLM entirely, set llm=None.
******