In [None]:
!pip install -qU langchain faiss-cpu openai pypdf tiktoken

In [None]:
!pip install -qU python-dotenv

### Get the OpenAI API key

In [None]:
import dotenv, os
dotenv.load_dotenv('/content/variables/.env')
openai_api_key = os.getenv('OPEN_AI_API_KEY')

## Import the packages

In [None]:
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [None]:
## Load PDF 
loader = PyPDFLoader("/content/variables/Attention.pdf")
documents = loader.load()

In [None]:
len(documents) ## Number of pages in the document

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [None]:
len(texts)

In [None]:
texts[0]

## Get Embeddings

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
embedded_docs = FAISS.from_documents(texts, embeddings)
retriever = embedded_docs.as_retriever(search_kwargs={"k":3}) ## Rretrive 3 documents

## Create a chain to answer the questions

In [None]:
chain = RetrievalQA.from_chain_type(
    llm=OpenAI(temperature=0.0,),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [None]:
query = "What dataset was used?"

response = chain(query)

In [None]:
response

In [None]:
query = "Who are the authors of Attention is all you need?"
response = chain(query)

In [None]:
response