#### **Install requires dependencies**

In [1]:
#!pip install pypdf
# pip install langchain_community
# pip install -U langchain-text-splitters
# pip install langchain-google-genai

In [2]:
from langchain_community.document_loaders import PyPDFLoader

In [3]:
doc = PyPDFLoader("Leave_No_Context_Behind.pdf")
page = doc.load_and_split()

In [4]:
from langchain_text_splitters import NLTKTextSplitter
text_splitter = NLTKTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(page)

In [5]:
print(chunks[0].page_content)

Preprint.

Under review.

Leave No Context Behind:
Efficient Infinite Context Transformers with Infini-attention
Tsendsuren Munkhdalai, Manaal Faruqui and Siddharth Gopal
Google
tsendsuren@google.com
Abstract
This work introduces an efficient method to scale Transformer-based Large
Language Models (LLMs) to infinitely long inputs with bounded memory
and computation.

A key component in our proposed approach is a new at-
tention technique dubbed Infini-attention.

The Infini-attention incorporates
a compressive memory into the vanilla attention mechanism and builds
in both masked local attention and long-term linear attention mechanisms
in a single Transformer block.

We demonstrate the effectiveness of our
approach on long-context language modeling benchmarks, 1M sequence
length passkey context block retrieval and 500K length book summarization
tasks with 1B and 8B LLMs.

Our approach introduces minimal bounded
memory parameters and enables fast streaming inference for LLMs.


In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
genAI_model = GoogleGenerativeAIEmbeddings(google_api_key="AIzaSyAliBxWLI5AYRZGONXlqprqMtyhfYf30Ng", model="models/embedding-001")

#### Store the chunks in ChromaDB

In [7]:
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(chunks, genAI_model, persist_directory="./chromadb")
db.persist()

In [9]:
db_connection = Chroma(persist_directory="./chromadb", embedding_function = genAI_model)

In [10]:
retriever = db_connection.as_retriever(search_kwargs={"k": 5})

In [None]:
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

In [None]:
chat_template = ChatPromptTemplate.from_messages([
    
    # System_Message Prompt Template
    SystemMessage(content="""You are a nice polite AI Bot. 
    Provide assistance to the user based on the context asked by the user.
    Make sure your answers are relevant to the context."""),
    
    # Human_Message Prompt Template
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context:
    {context}
    
    Question: 
    {question}
    
    Answer: """)])

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
chat_model = ChatGoogleGenerativeAI(google_api_key="AIzaSyAliBxWLI5AYRZGONXlqprqMtyhfYf30Ng", model="gemini-1.5-pro-latest")

In [None]:
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [None]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()} | chat_template | chat_model | output_parser
)

In [None]:
from IPython.display import Markdown
model_response = rag_chain.invoke("explain about Context Transformers with Infini-attention")
Markdown(model_response)

In [None]:
model_response = rag_chain.invoke("explain about Long-term context injection")
Markdown(model_response)

In [None]:
model_response = rag_chain.invoke("Summarize the paper and expain it briefly")
Markdown(model_response)