In [None]:
import os
from dotenv import load_dotenv

from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.vector_stores.lancedb import LanceDBVectorStore
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.response.notebook_utils import (
    display_source_node,
    display_response,
)
from llama_index.core.extractors import (
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
)
from llama_index.extractors.entity import EntityExtractor
from llama_index.core.ingestion import IngestionPipeline
import Stemmer
from IPython.display import Markdown, display
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# apply nested async to run in a notebook
import nest_asyncio

nest_asyncio.apply()

load_dotenv()

Azure OpenAI LLM and Embedding connection:

In [None]:
llm = AzureOpenAI(
    engine=os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT"),
    model=os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_LLM_API_VERSION"),
)


embed_model = AzureOpenAIEmbedding(
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
    deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_EMBEDDING_API_VERSION"),
)


Settings.llm = llm
Settings.embed_model = embed_model

Ollama for open source LLM and Embedding models:

Ollama for local and open source models: https://ollama.com/

In [None]:
# ollama_llm = Ollama(model="llama3.1:latest ", request_timeout=600.0)

# ollama_embed_model = OllamaEmbedding(
#     model_name="nomic-embed-text:latest",
#     base_url="http://localhost:11434"
# )

# Settings.llm = ollama_llm
# Settings.embed_model = ollama_embed_model

Load pdf file/s from data folder:

Using the following pdf:
https://arxiv.org/pdf/2501.12948

In [None]:
profiles_folder = "../data"
documents = SimpleDirectoryReader(profiles_folder).load_data()
print(len(documents))

Split text by sentence

In [None]:
# initialize node parser
splitter = SentenceSplitter(chunk_size=500, chunk_overlap=50)

nodes = splitter.get_nodes_from_documents(documents)
print(len(nodes))

In [None]:
nodes[0].metadata

Split with more metadata:

https://docs.llamaindex.ai/en/stable/module_guides/indexing/metadata_extraction/

In [None]:
transformations = [
    SentenceSplitter(chunk_size=500, chunk_overlap=50),
    TitleExtractor(nodes=3),
    QuestionsAnsweredExtractor(questions=3),
    SummaryExtractor(summaries=["prev", "self", "next"]),
    KeywordExtractor(keywords=10),
    # EntityExtractor(prediction_threshold=0.5),
]
pipeline = IngestionPipeline(transformations=transformations)

nodes = pipeline.run(documents=documents)
print(len(nodes))

In [None]:
nodes[0].metadata

Question which we will use for the retrieve and query:

In [None]:
questions = [
    "which reinforcement learning algorithm used in the deepseek r1 training?",
    "can you explain how deepseek r1 model was trained?",
    "can you explain more about the cold start?",
    "list all the unsuccessful attempts mentioned in the paper?",
    "can you list all the advantages of the deepseek r1 model over over models?",
    "can explain in detail about reward modeling in which part of the paper it was mentioned?",
    "list all the steps which performed to train the deepseek r1 model?",
]



question = questions[3]
question

Key words search using BM25:

https://docs.llamaindex.ai/en/stable/examples/retrievers/bm25_retriever/

In [None]:
bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=10,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
)
bm25_retriever

In [None]:
bm25_retrieved_nodes = bm25_retriever.retrieve(question)
for node in bm25_retrieved_nodes:
    display_source_node(node, source_length=5000)

In [None]:
bm25_query_engine = RetrieverQueryEngine.from_args(bm25_retriever)

response = bm25_query_engine.query(question)
display_response(
    response, source_length=5000, show_source=True, show_source_metadata=True
)

Vector Search:

https://docs.llamaindex.ai/en/v0.10.33/examples/vector_stores/SimpleIndexDemo/

In [None]:
vector_store = LanceDBVectorStore(
    uri="./lancedb", mode="overwrite", query_type="vector"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

vector_index = VectorStoreIndex(
    nodes=nodes, show_progress=True, storage_context=storage_context
)
# advanced_nodes
# configure retriever
vector_retriever = VectorIndexRetriever(
    index=vector_index,
    similarity_top_k=10,
)

In [None]:
vector_retrieved_nodes = vector_retriever.retrieve(question)
for node in vector_retrieved_nodes:
    display_source_node(node, source_length=5000)

In [None]:
vector_query_engine = RetrieverQueryEngine.from_args(vector_retriever)

response = vector_query_engine.query(question)
display_response(
    response, source_length=5000, show_source=True, show_source_metadata=True
)

Hybrid Search:

https://docs.llamaindex.ai/en/stable/examples/retrievers/reciprocal_rerank_fusion/

In [None]:
hybrid_retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=10,
    num_queries=1,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=False,
    verbose=True,
)

In [None]:
hybrid_retrieved_nodes = hybrid_retriever.retrieve(question)
for node in hybrid_retrieved_nodes:
    display_source_node(node, source_length=5000)

In [None]:
hybrid_query_engine = RetrieverQueryEngine.from_args(hybrid_retriever)

response = hybrid_query_engine.query(question)

display_response(
    response, source_length=5000, show_source=True, show_source_metadata=True
)

View Prompt:

In [None]:
# define prompt viewing function
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))


prompts_dict = hybrid_query_engine.get_prompts()
display_prompt_dict(prompts_dict)

Advanced Links:

https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/

https://docs.llamaindex.ai/en/stable/examples/workflow/rag/