In [6]:
# pip install langchain
# pip install pypdf

In [1]:
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [2]:
# Reading document

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("LNCB_Paper.pdf")
data = loader.load_and_split()

In [3]:
# splitting documents into chunks

from langchain_text_splitters import NLTKTextSplitter

text_splitter = NLTKTextSplitter(chunk_size = 500, chunk_overlap = 100)
chunks = text_splitter.split_documents(data)

Created a chunk of size 568, which is longer than the specified 500
Created a chunk of size 506, which is longer than the specified 500
Created a chunk of size 633, which is longer than the specified 500


In [4]:
# loading googleGenAI embeddings

# reading google api key
f = open(r"C:\Users\nishk\Desktop\Innomatics\LangChain_RAG\Keys\api_key.txt")
google_api_key = f.read()

embedding_model = GoogleGenerativeAIEmbeddings(google_api_key = google_api_key,
                                     model = "models/embedding-001")

In [6]:
# creating chunks embeddings
# store the chunks in vector store
from langchain_community.vectorstores import Chroma

# embedd each chunk and load it into the vector store
database = Chroma.from_documents(chunks, embedding_model, persist_directory = "Chroma_db")

database.persist()


In [7]:
# setting connection with chromadb

db_connection = Chroma(persist_directory = "Chroma_db", embedding_function = embedding_model)

In [8]:
# converting Chroma db_connection to retriever object

retriever = db_connection.as_retriever(search_kwargs = {"k": 5})

In [9]:
# creating chat template

chat_template = ChatPromptTemplate.from_messages([
    SystemMessage(content="""You are a Helpful AI Bot. 
    You take the context and question from user. Your answer should be based on the specific context."""),
    HumanMessagePromptTemplate.from_template("""Aswer the question based on the given context.
    Context:
    {context}
    
    Question: 
    {question}
    
    Answer: """)
])


In [10]:
# creating chat model
chat_model = ChatGoogleGenerativeAI(google_api_key = google_api_key, 
                                    model="gemini-1.5-pro-latest")

In [11]:
# creating output parser
output_parser = StrOutputParser()

In [12]:
# creating chain

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | chat_model
    | output_parser
)

In [14]:
from IPython.display import Markdown as markdown

response = rag_chain.invoke("give me the short summary of the paper")
markdown(response)

## Short Summary of the Paper:

This paper explores the challenge of balancing efficiency and quality when applying compressive memory techniques to large language models (LLMs). While recent efforts have focused on system-level optimizations for attention mechanisms, there's still a need for simpler and more effective memory compression methods. 

The authors propose a novel approach that leverages linear attention mechanisms for memory update and retrieval processes. This method draws inspiration from Katharopoulos et al. (2020) due to its simplicity and performance. The paper also highlights the use of a 32K input length during fine-tuning and a 500K input length for evaluation, along with specific decoding parameters for generating summaries. 
