In [1]:
from pathlib import Path
import re, openai, yaml, os
import http.client as httplib
from llama_index.llms import AzureOpenAI
from llama_index.schema import MetadataMode
from llama_index.llm_predictor import LLMPredictor
from llama_index import set_global_service_context
from sentence_transformers import SentenceTransformer
from llama_index.embeddings import HuggingFaceEmbedding
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from llama_index.node_parser import SimpleNodeParser, SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.finetuning import (
                                    generate_qa_embedding_pairs,
                                    EmbeddingQAFinetuneDataset,
                                    SentenceTransformersFinetuneEngine
                                    )

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('/Users/1zuu/Desktop/LLM RESEARCH/LLMPro/cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

os.environ['AD_OPENAI_API_KEY'] = credentials['AD_OPENAI_API_KEY']
os.environ['HUGGINGFACEHUB_API_TOKEN'] = credentials['HUGGINGFACEHUB_API_TOKEN']

In [3]:
train_dir = 'data/Camel Papers Train/'
val_dir = 'data/Camel Papers Test/'

In [4]:
embedding_llm = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
llm=AzureOpenAI(
                deployment_name=credentials['AD_DEPLOYMENT_ID'],
                model=credentials['AD_ENGINE'],
                api_key=credentials['AD_OPENAI_API_KEY'],
                api_version=credentials['AD_OPENAI_API_VERSION'],
                azure_endpoint=credentials['AD_OPENAI_API_BASE']
                )
chat_llm = LLMPredictor(llm)

# Advanced Retrieval Method: Sentence Window Retrieval

Fine-tuning our embeddings is a powerful way to ensure we're better at retrieving the correct context - but we can go a step further and improve the way we actually look at context as well.

In this demonstration, we'll be leveraging the idea of a SentenceWindowNodeParser and metadata replacement to take our retrieval to the next level.

At a high level, what we're doing is straightforward:

1. We parse our document into sentence-wise nodes.
2. We find the most relevant sentence-wise nodes to our query.
3. We add additional context based on a "window" around that base sentence-wise node.
4. We use that enhanced context as context for our LLM!


Let's look at this with a visual example:

In [5]:
node_parser = SentenceWindowNodeParser.from_defaults(
                                                    window_size=6,
                                                    window_metadata_key="window",
                                                    original_text_metadata_key="original_text",
                                                    )

simple_node_parser = SimpleNodeParser.from_defaults() # simple node parser

llm=AzureOpenAI(
                deployment_name=credentials['AD_DEPLOYMENT_ID'],
                model=credentials['AD_ENGINE'],
                api_key=credentials['AD_OPENAI_API_KEY'],
                api_version=credentials['AD_OPENAI_API_VERSION'],
                azure_endpoint=credentials['AD_OPENAI_API_BASE']
                )
chat_llm = LLMPredictor(llm)

# base Embeddings model
embed_model_base = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

# fine-tuned Embeddings model
embed_model = HuggingFaceEmbedding(model_name="bge-small-finetuned")

# fine-tuned ServiceContext
ctx = ServiceContext.from_defaults(
                                    llm_predictor=chat_llm,
                                    embed_model=embed_model,
                                 )

# base ServiceContext
ctx_base = ServiceContext.from_defaults(
                                        llm_predictor=chat_llm,
                                        embed_model=embed_model_base
                                        )

In [6]:
documents = SimpleDirectoryReader(train_dir).load_data()

nodes = node_parser.get_nodes_from_documents(documents)
nodes_base = simple_node_parser.get_nodes_from_documents(documents)

sentence_index = VectorStoreIndex(nodes, service_context=ctx)
sentence_index_base = VectorStoreIndex(nodes_base, service_context=ctx_base)

In [7]:
query_engine = sentence_index.as_query_engine(
                                            similarity_top_k=3,
                                            node_postprocessors=[
                                                                MetadataReplacementPostProcessor(target_metadata_key="window")
                                                                ],
                                            )

In [8]:
window_response = query_engine.query("How do camelid genetics influence wool quality?")
window_response.response

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


'Genetics mechanisms controlling fiber traits in llamas and alpacas are not fully understood. However, a few genetic selection programs have been implemented in domestic camelids to improve fleece characteristics. The proteins that form the fiber are encoded by keratin genes (KRT) and keratin-associated proteins (KRTAP) which are expressed in a highly regulated manner during hair follicle growth. The presence of major genes affecting quantitative fiber traits such as fiber diameter, standard deviation of fiber diameter, variation coefficiency, and comfort factor in both Huacaya and Suri alpacas has been proposed based on segregation analysis.'