Requirements Installation

In [None]:
!pip install langchain langchain_community langchain_google_genai chroma sentence_transformers pypdf faiss-gpu rank_bm25

Loading Single PDF File using PyPDF Loader

In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("./langchian.pdf")
docs = loader.load()
docs[0]

In [None]:
print(docs[0].metadata) #print Metadata like filename

Loading Mulitple PDF File using PyPDFDirectory Loader

In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader("./example_data") #folder path
docs = loader.load()

Loading CSV File using CSVLoader

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path="./data.csv")
docs = loader.load()

### Split Documents and Chunking

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

#Splitting data
text_split = RecursiveCharacterTextSplitter(chunk_size=500,
                                          chunk_overlap=50)
chunks = text_split.split_documents(docs)

In [None]:
#Total count of splitted chunks
print(len(chunks))

In [None]:
#print the chuck data from the index
chunks[0]

Embedding Model

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = 'BAAI/bge-large-en-v1.5',  #load different model from huggingface if needed.
                                   model_kwargs={'device':"cuda"}          #set 'cpu' if GPU not available.
                                   )

Embedding chunk data and storing it Vectorstore

In [None]:
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(chunks, embeddings)

Saving locally and loading from local

In [None]:
vectorstore.save_local(index_dir)
vector_db = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)

Loading Gemini LLM model

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os
GOOGLE_API_KEY = "API Key" # replace your API token
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0) #Play with parameters if you needed.
# Temperature controls randomness in generating text '0' to '1' is high.

Chat Memory

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history")

Prompt Templates from langchain HUB

In [None]:
#RAG Prompt
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

Custom Prompt Template

In [None]:
from langchain_core.prompts import ChatPromptTemplate

#you can re-write and play with it. as you required.
template = """
<|system|>>
You are a helpful AI Assistant that follows instructions extremely well.
Answer the following question form the given context.

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

prompt = ChatPromptTemplate.from_template(template)

Hybrid Search

In [None]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
keyword_retriever = BM25Retriever.from_documents(chunks)
keyword_retriever.k =  3

### Ensemble Retriever

In [None]:
ensemble_retriever = EnsembleRetriever(retrievers=[vectorstore_retreiver,
                                                   keyword_retriever],
                                       weights=[0.5, 0.5])

OutputParser

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()                        #helps to get output in str format
# output_parser = CommaSeparatedListOutputParser()

Setting cache

In [None]:
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache
set_llm_cache(InMemoryCache())      #reduce reteriving for same queries

<h3>Pipeline</h3>

In [None]:
from langchain_core.runnables import RunnablePassthrough

chain = (
    {"context": ensemble_retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

In [None]:
chain.invoke(f"Eplain Agents in Langchain")