In [1]:
# %pip install --upgrade --quiet  llmlingua accelerate ragas
# %pip install --upgrade --quiet faiss-cpu

## Prompt Compression using LLMLingua

In [2]:
import os
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from ragas import evaluate
from langchain_core.messages.human import HumanMessage

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

### Data Processing

In [4]:
documents = TextLoader("../data/state_of_the_union.txt", encoding='utf-8').load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

### Configuring LLMs

In [6]:
load_dotenv()

os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://sriks-openai.openai.azure.com/"
os.environ["AZURE_OPENAI_API_VERSION"] = "2024-05-01-preview"
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] = "gpt-4o"
os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"] = "text-embedding-ada-002"

azure_model = AzureChatOpenAI(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
    validate_base_url=False,
)

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
azure_embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"]
)

azure_model.invoke(input=[HumanMessage(content='Hi')])

AIMessage(content='Hello! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_67802d9a6d', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, id='run-1743ee3f-0901-45b1-9f13-2c8b6b69957a-0', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17})

### RAG with Compressor

In [7]:
retriever = FAISS.from_documents(texts, azure_embeddings).as_retriever(search_kwargs={"k": 2})
query = "What did the president say about Ketanji Brown Jackson"
docs = retriever.get_relevant_documents(query)
pretty_print_docs(docs)

  warn_deprecated(


Document 1:

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.
----------------------------------------------------------------------------------------------------
Document 2:

a President has is nominating someone to serve on the United States Supreme Court.


In [8]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_compressors.llmlingua_filter  import LLMLinguaCompressor

# Use SLM like Microsoft Phi-2 model for compression
compressor = LLMLinguaCompressor(model_name="microsoft/phi-2", device_map="cpu")

## Or use LLMLingua-2 BERT model, the below models is larger, may take 20-30 minutes to download
# compressor2= LLMLinguaCompressor(
#     model_name="microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank",
#     use_llmlingua2=True, # Whether to use llmlingua-2
# )

Loading checkpoint shards: 100%|██████████| 2/2 [01:15<00:00, 37.71s/it]


In [9]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [11]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(llm=azure_model, retriever=retriever)
chain.invoke({"query": query})

{'query': 'What did the president say about Ketanji Brown Jackson',
 'result': 'The president mentioned nominating Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court.'}

: 