In [1]:
from langchain_community.document_loaders import PyPDFLoader,PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import numpy as np

In [2]:
loader = PyPDFDirectoryLoader("paper/")
documents = loader.load()
documents

[Document(metadata={'source': 'paper\\attention.pdf', 'page': 0}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transfo

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_document = text_splitter.split_documents(documents)

In [4]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

  from tqdm.autonotebook import tqdm, trange


In [5]:
print(np.array(huggingface_embeddings.embed_query(final_document[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_document[0].page_content)).shape)

[-4.11435254e-02  1.12881670e-02 -4.82023098e-02 -3.65781151e-02
 -8.84164125e-03  2.74901595e-02  5.55983605e-03  1.57355685e-02
  6.67438582e-02 -5.19652618e-03  1.07394194e-03 -2.29805727e-02
  3.89843956e-02  8.15375373e-02  2.59759314e-02  1.12256631e-02
 -3.63948904e-02  4.50761989e-02  5.00951111e-02 -4.60564941e-02
  4.19073589e-02 -3.62798870e-02 -5.30159753e-03  9.43684019e-03
 -1.14307776e-02 -3.25555503e-02 -1.22781927e-02 -3.20052616e-02
 -4.57952395e-02 -2.32240140e-01  1.70852281e-02 -3.34782898e-02
  7.46601820e-02  1.71358362e-02 -2.87839230e-02 -2.62469184e-02
 -5.89441210e-02 -4.59725782e-02 -6.23235032e-02 -1.15564102e-02
  1.20267980e-02  2.43485980e-02 -1.76616460e-02 -3.62666883e-02
 -2.98957098e-02 -3.75974216e-02 -1.93930101e-02 -7.48388916e-02
 -3.85977365e-02 -1.19003141e-02 -4.76373062e-02 -1.20431213e-02
 -2.57740971e-02  8.40908736e-02  7.88563583e-03 -2.14243233e-02
  2.86728125e-02  3.72647122e-02  6.03902079e-02 -1.17800534e-02
  1.38523495e-02  6.23644

In [6]:
vectorstore=FAISS.from_documents(final_document[:150],huggingface_embeddings)

In [7]:
query="What is Transformer?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)

Figure 1: The Transformer - model architecture.
The Transformer follows this overall architecture using stacked self-attention and point-wise, fully
connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,
respectively.
3.1 Encoder and Decoder Stacks
Encoder: The encoder is composed of a stack of N= 6 identical layers. Each layer has two
sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position-
wise fully connected feed-forward network. We employ a residual connection [ 11] around each of
the two sub-layers, followed by layer normalization [ 1]. That is, the output of each sub-layer is
LayerNorm( x+ Sublayer( x)), where Sublayer( x)is the function implemented by the sub-layer
itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding
layers, produce outputs of dimension dmodel = 512 .


In [8]:
vectorstore=FAISS.from_documents(final_document[:150],huggingface_embeddings)

In [9]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000020E951BED10> search_kwargs={'k': 3}


In [10]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf_token")

In [11]:
from langchain_community.llms import HuggingFaceHub

hf1=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500}

)

  warn_deprecated(


In [12]:
query = "what is transformer model architecture?"
hf1.invoke(query)

'what is transformer model architecture?\n\nTransformer is a neural network architecture that is used for natural language processing (NLP) tasks. It was introduced in 2017 by Google researchers and has since become one of the most popular architectures for NLP.\n\nThe transformer model is composed of two main components: an encoder and a decoder. The encoder takes as input a sequence of words and outputs a representation of the words in the sequence. The decoder takes as input the representation of'

In [13]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [14]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [16]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf1,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [17]:
query="""How Transformer model works?"""

In [18]:
result = retrievalQA.invoke({"query": query})
print(result['result'])


Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

Figure 1: The Transformer - model architecture.
The Transformer follows this overall architecture using stacked self-attention and point-wise, fully
connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,
respectively.
3.1 Encoder and Decoder Stacks
Encoder: The encoder is composed of a stack of N= 6 identical layers. Each layer has two
sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position-
wise fully connected feed-forward network. We employ a residual connection [ 11] around each of
the two sub-layers, followed by layer normalization [ 1]. That is, the output of each sub-layer is
LayerNorm( x+ Sublayer( x)), where Sublayer( x)is the function implemented by the sub-layer
itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding