1. Set up Asyncio

In [1]:
import nest_asyncio
nest_asyncio.apply()

2. Set up the Qdrant vector database

In [2]:
import qdrant_client

collection_name="chat_with_docs"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

3. Read the documents

In [3]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = './docs'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = loader.load_data()

In [4]:
type(docs), len(docs)


(list, 32)

 4. A function to index data

In [5]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext

def create_index(documents):

    vector_store = QdrantVectorStore(client=client,
                                     collection_name=collection_name)
    
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    index = VectorStoreIndex.from_documents(documents,
                                            storage_context=storage_context)
    
    return index

5. Load the embedding model and index data

In [16]:
import torch
torch.cuda.empty_cache()  # If using CUDA
torch.mps.empty_cache()

In [6]:
import numpy as np
print(np.__version__)

1.24.1


In [7]:
import torch

tensor = torch.tensor([1.0, 2.0, 3.0, 6.0])
array = tensor.numpy()
print(array)

[1. 2. 3. 6.]


In [8]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

Settings.embed_model = embed_model

index = create_index(docs)

6. Load the LLM

In [9]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

Settings.llm = llm

7. Define the prompt template

In [10]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

8.Reranking

In [11]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


9. Query the document

In [15]:
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

response = query_engine.query("What exactly is DSPy?")

10. Display the responce

In [16]:
from IPython.display import Markdown, display

display(Markdown(str(response)))

DSPy stands for Deep Speech Processing, which is an open-source Python library used for natural language processing (NLP) tasks. It provides a framework for implementing various NLP techniques, including text analysis, sentiment analysis, language modeling, and more. The library allows users to define their own custom models and interfaces using a variety of signatures, which are essentially typed declarations of functions that take input fields and output fields as arguments. This enables the creation of customized prompts, models, and interfaces for specific tasks and applications.