In [5]:
# pip install openai
# pip install langchain
# pip install langchain_community
# pip install faiss-cpu
# pip install pypdf
# pip install tiktoken

### Installing libraries

In [6]:
pip install openai



In [18]:
pip install langchain



In [19]:
pip install langchain_community



In [20]:
pip install pypdf

Collecting pypdf
  Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pypdf
Successfully installed pypdf-4.2.0


In [23]:
pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.7.0


### Importing libraries

In [8]:
import openai
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [13]:
OPENAI_API_KEY = "YOUR KEY HERE"

In [14]:
from langchain.llms import OpenAI

llm = OpenAI(openai_api_key=OPENAI_API_KEY)

In [15]:
llm.invoke("Explain EDA in just 2 lines")

'\n\nEDA (Exploratory Data Analysis) is the process of examining and visualizing data to gain insights and understand the underlying patterns and relationships. It involves using statistical methods and data visualization techniques to extract meaningful information from the data.'

### using **custom** dataset

#### RecursiveCharacterTextSplitter is a text splitter that splits the text into chunks, trying to keep paragraphs togeher and avoid loosing context over pages

In [21]:
pdf_reader = PyPDFLoader("/content/RAGPaper.pdf")
documents = pdf_reader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [25]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

# Create embeddings
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
db = FAISS.from_documents(documents=chunks, embedding=embeddings)

# FAISS: Facebook AI Similarity Search --> Powerful library for similarity search and clustering of dense vectors

In [26]:
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow up Input: {question}
Standalone questions: """)

qa = ConversationalRetrievalChain.from_llm(llm=llm,retriever=db.as_retriever(),condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True, verbose=False)

### Ask a query

In [32]:
chat_history=[]
query="""Who is Sachin Tendulkar"""
result = qa({"question":query,"chat_history":chat_history})
print(result["answer"])

 I don't know.


In [33]:
chat_history=[]
query="""What is RAGs and tell me more about use cases of RAGs, in a detailed manner"""
result = qa({"question":query,"chat_history":chat_history})
print(result["answer"])

 RAGs (Retrieval-Augmented Generative models) are a type of natural language processing model that combines the best of both worlds from "closed-book" and "open-book" approaches to question answering. It uses both parametric and non-parametric knowledge to generate answers to open-ended questions.

Some use cases of RAGs include open-domain question answering, where it has shown to outperform state-of-the-art models on various datasets. It can also be used for abstractive question answering, where it generates free-form, factually correct answers to questions. Additionally, RAGs can be employed in specific domains, such as medical knowledge, to assist in answering questions and providing relevant information.

The benefits of RAGs include its grounding in real factual knowledge, which reduces the chances of generating false or biased information. It also offers more control and interpretability compared to other language models. This makes it useful in a wide range of scenarios, such a