In [1]:
from llama_index.core import Document, GPTVectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from qdrant_client import QdrantClient
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from llama_index.vector_stores.qdrant import QdrantVectorStore




In [2]:
# Step 1: Load PDF document and split into chunks
loader = PyPDFLoader("SWE-Bench.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [3]:
# Step 2: Load HuggingFace BGE Embedding Model
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from tqdm.autonotebook import tqdm, trange





In [4]:
# Step 3: Initialize Qdrant Client for the vector store
qdrant_client = QdrantClient(
    url="https://c99d7921-24d0-4759-8836-938fa2f15d91.europe-west3-0.gcp.cloud.qdrant.io:6333", 
    api_key="6cz2PvQQIfqLu2ALzGiIOCUSMBEaLV2W4MudoxYXOfywU4kt3Mu6Cw",
)


In [5]:
# Step 4: Convert text chunks into LlamaIndex Document objects
llama_documents = [Document(text=doc.page_content) for doc in texts]

In [6]:
# Step 4: Convert the texts into nodes for LlamaIndex (equivalent of documents in LangChain)
node_parser = SimpleNodeParser()
nodes = node_parser.get_nodes_from_documents(llama_documents)

In [7]:
# Step 5: Initialize the vector store with Qdrant and the HuggingFace embeddings
vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name="vector_db",  # The collection name in Qdrant
    embedding_model=embeddings  # Use the HuggingFace BGE embeddings
)

In [8]:
# Step 6: Build the LlamaIndex with the Qdrant vector store
index = GPTVectorStoreIndex(
    nodes=nodes,
    vector_store=vector_store,
    embed_model=embeddings  # Explicitly pass the HuggingFace embeddings model
)

In [11]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Define the HuggingFaceLLM class
class HuggingFaceLLM:
    def __init__(self, model_name: str):
        # Load the tokenizer and model from Hugging Face
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        
    def complete(self, prompt: str):
        # Tokenize input prompt
        inputs = self.tokenizer(prompt, return_tensors="pt")
        
        # Generate output using the model
        outputs = self.model.generate(**inputs, max_length=500, num_return_sequences=1)
        
        # Decode the generated tokens to get the response
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        return response

# Instantiate the Hugging Face LLM
llm = HuggingFaceLLM("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# Custom query engine logic (depends on your existing setup)
# Assuming `index` is already built and configured
class CustomQueryEngine:
    def __init__(self, index, llm):
        self.index = index
        self.llm = llm

    def query(self, query_str):
        # Retrieve relevant context (mocking retrieval from index for demonstration)
        context_str = ""
        
        # Use the LLM to complete the query based on the context
        prompt = f"Context: {context_str}\n\nQuery: {query_str}\nAnswer:"
        response = self.llm.complete(prompt)
        return response

# Instantiate the query engine with the Hugging Face LLM
query_engine = CustomQueryEngine(index, llm)

# Query the engine
response = query_engine.query("SWE-bench dataset contains how many number of problem examples?")
print(response)


Context: SWE-bench dataset contains how many number of problem examples?

Query: How does SWE-bench evaluate an LLM?
Answer: SWE-bench evaluates an LLM by generating a set of problem examples and comparing the generated output to the ground truth. The evaluation metric is the F1 score, which measures the proportion of correct predictions.
