In [1]:
import os 
import json 
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate 
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv 
import PyPDF2


In [2]:
load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY

In [3]:
llm=ChatOpenAI(model="gpt-4")
output=StrOutputParser()
memory=ConversationBufferMemory(memory_key="hsitory")

  memory=ConversationBufferMemory(memory_key="hsitory")


In [4]:
print(llm.invoke("hi").content)


Hello! How can I assist you today?


In [5]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in range(len(reader.pages)):
            text += reader.pages[page].extract_text() + "\n"
        return text,reader

pdf_path = "attention.pdf"  
pdf_text,pdf = extract_text_from_pdf(pdf_path)
print(pdf_text)  


Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.comNoam Shazeer∗
Google Brain
noam@google.comNiki Parmar∗
Google Research
nikip@google.comJakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.comAidan N. Gomez∗ †
University of Toronto
aidan@cs.toronto.eduŁukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experime

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(separators='/n',chunk_size=1000,chunk_overlap=10)

In [7]:
text=text_splitter.split_text(pdf_text)

In [8]:
text

['Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and co

In [9]:
for i, chunk in enumerate(text[:5]):
    print(f"Chunk {i+1}:\n{chunk}\n{'-'*50}")

Chunk 1:
Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.comNoam Shazeer∗
Google Brain
noam@google.comNiki Parmar∗
Google Research
nikip@google.comJakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.comAidan N. Gomez∗ †
University of Toronto
aidan@cs.toronto.eduŁukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely.

In [10]:
# DOCUMENT WAY 

from langchain.schema import Document
document = Document(page_content=pdf_text)


In [11]:
doc_text=text_splitter.split_documents([document])

In [12]:
doc_text

[Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms

In [13]:
embeddings=OpenAIEmbeddings()
persist_directory='db'

vectordb = Chroma.from_documents(
    documents=doc_text,
    embedding=embeddings,
    persist_directory=persist_directory
)

In [14]:
retriever=vectordb.as_retriever()

In [15]:
docs=retriever.invoke("What is attention")

In [16]:
docs

[Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms

In [17]:
retriver=vectordb.as_retriever(search_kwargs={"k":2})
docs=retriver.invoke("What is attention")

In [18]:
docs

[Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms

In [19]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff",  
    retriever=retriever  
)


In [20]:
qa_chain.invoke("What is attention")

{'query': 'What is attention',
 'result': 'In the context of the provided information, "attention" refers to a mechanism in neural networks, more specifically in sequence transduction models. This mechanism helps to connect the encoder and the decoder in the models. Attention mechanisms have become a key component in many state-of-the-art models as they help the system focus on relevant parts of the input data, enhancing the performance of the model.'}

# Using different chains 

In [21]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import MessagesPlaceholder
from langchain import hub


In [22]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

In [23]:

chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("user", "{input}"),
    ]
)

question_answering_chain=create_stuff_documents_chain(llm, chat_prompt)

rag_chain = create_retrieval_chain(retriever, question_answering_chain)
response = rag_chain.invoke({"input":"what is attention?"})

print(f"Response generated: {response}")

Response generated: {'input': 'what is attention?', 'context': [Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network archi

# Using buffer 


In [24]:
query="What is Attetion?"


memory.save_context({"input": query}, {"output": ""})  

conversation_context = memory.load_memory_variables({})
print(f"Conversation context from memory: {conversation_context}")

chat_prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("user", f"Question:{query}\n\n{conversation_context.get('history', '')}")
])

            

Conversation context from memory: {'hsitory': 'Human: What is Attetion?\nAI: '}


In [25]:
question_answering_chain=create_stuff_documents_chain(llm, chat_prompt)


In [26]:
rag_chain = create_retrieval_chain(retriever, question_answering_chain)
# response = rag_chain.invoke({"input":"what is attention?"})


response = rag_chain.invoke({'input': query})
print(f"Response generated: {response}")
            
memory.save_context({"input": query}, {"output": str(response)})

Response generated: {'input': 'What is Attetion?', 'context': [Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network archit

In [27]:
response

{'input': 'What is Attetion?',
 'context': [Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transfo

In [28]:

query2="Explain in depth"
response2 = rag_chain.invoke({'input': query2})
print(f"Response generated: {response2}")
            
memory.save_context({"input": query2}, {"output": str(response2)})

Response generated: {'input': 'Explain in depth', 'context': [Document(metadata={}, page_content='n\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nInput-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>Figure 5: Many of the attention heads exhibit behaviour that seems related to the structure of the\nsentence. We give two such examples above, from two different heads from the encoder self-attention\nat layer 5 of 6. The heads clearly learned to perform different tasks.\n15'), Document(metadata={}, page_content='n\nshould\nb

In [29]:
response2['answer']

'Attention, in the context of the retrieved information, refers to a mechanism in deep learning models, particularly in the field of natural language processing. The attention mechanism enables the model to focus on particular parts of the input when generating the output, much like human attention. It is an essential component of models like transformers, used in sentence structure understanding tasks.\n'

# Trying now with  create_history_aware_retriever 

In [30]:
from langchain.chains import create_history_aware_retriever


In [31]:
retriever_prompt = (
    "Given a chat history and the latest user question which might reference context in the chat history,"
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
)
     

contextualize_q_prompt  = ChatPromptTemplate.from_messages(
    [
        ("system", retriever_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),


     ]
)
     

history_aware_retriever = create_history_aware_retriever(llm,retriever,contextualize_q_prompt)
     

In [32]:
from langchain_core.messages import HumanMessage, AIMessage


qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
     

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
     

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
     


chat_history = []
     

question1 = "what is Encoder?"
     

message1= rag_chain.invoke({"input": question1, "chat_history": chat_history})
     

message1["answer"]

'The context provided does not include a specific definition or explanation of what an encoder is.'

In [33]:
chat_history.extend(
    [
        HumanMessage(content=question1),
        AIMessage(content=message1["answer"]),
    ]
)
    
chat_history



[HumanMessage(content='what is Encoder?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='The context provided does not include a specific definition or explanation of what an encoder is.', additional_kwargs={}, response_metadata={})]

In [34]:
second_question = "How it is related with decoder ?"
message2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(message2["answer"])

In the context of machine learning and language modeling, an encoder is part of an encoder-decoder architecture. The encoder processes the input data and generates a condensed representation, often called a context or hidden state. This output is then fed into the decoder, which uses it to generate the final output. This structure is often used in tasks such as machine translation or sequence-to-sequence prediction.


In [35]:
third_question = "How both are used in attention ?"
message3 = rag_chain.invoke({"input": third_question, "chat_history": chat_history})

print(message3["answer"])

In the attention model, both encoder and decoder are used in different ways. In "encoder-decoder attention" layers, the queries come from the previous decoder layer, and the keys and values come from the output of the encoder. This allows each position in the decoder to attend over all positions in the input sequence. The encoder contains self-attention layers where all of the keys, values, and queries come from the same place, which is the output of the previous layer in the encoder.


# Creating a session so it goes on using runnable chain 

In [36]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [37]:
store = {}

In [38]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [39]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)
     

In [40]:
conversational_rag_chain.invoke(
    {"input": "What is decoder?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'The decoder in this context is composed of a stack of six identical layers. In addition to the two sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head attention over the output of the encoder stack. It also uses residual connections around each of the sub-layers, followed by layer normalization, and modifies the self-attention sub-layer to prevent positions from attending to subsequent positions, ensuring predictions for each position depend only on the known outputs at positions less than it.'

In [41]:
store


{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is decoder?', additional_kwargs={}, response_metadata={}), AIMessage(content='The decoder in this context is composed of a stack of six identical layers. In addition to the two sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head attention over the output of the encoder stack. It also uses residual connections around each of the sub-layers, followed by layer normalization, and modifies the self-attention sub-layer to prevent positions from attending to subsequent positions, ensuring predictions for each position depend only on the known outputs at positions less than it.', additional_kwargs={}, response_metadata={})])}

In [42]:
conversational_rag_chain.invoke(
    {"input": "What is opposite of this?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'The opposite of a decoder in this context would be an encoder. An encoder in sequence modeling is designed to understand and transform the sequential input data into a compressed representation, often capturing the dependencies in the data. While the decoder generates output sequences based on these representations, an encoder provides the required input for the decoder by condensing the essential information from the source data.'

In [43]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is decoder?

AI: The decoder in this context is composed of a stack of six identical layers. In addition to the two sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head attention over the output of the encoder stack. It also uses residual connections around each of the sub-layers, followed by layer normalization, and modifies the self-attention sub-layer to prevent positions from attending to subsequent positions, ensuring predictions for each position depend only on the known outputs at positions less than it.

User: What is opposite of this?

AI: The opposite of a decoder in this context would be an encoder. An encoder in sequence modeling is designed to understand and transform the sequential input data into a compressed representation, often capturing the dependencies in the data. While the decoder generates output sequences based on these representations, an encoder provides the required input for the decoder by condensing t

# Using PGVector 

In [56]:
!pip3 install pgvector psycopg2 langchain langchain-community


Collecting psycopg2
  Using cached psycopg2-2.9.10.tar.gz (385 kB)
  Preparing metadata (setup.py) ... [?25ldone
Using legacy 'setup.py install' for psycopg2, since package 'wheel' is not installed.
Installing collected packages: psycopg2
  Running setup.py install for psycopg2 ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mRunning setup.py install for psycopg2[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[40 lines of output][0m
  [31m   [0m running install
  [31m   [0m running build
  [31m   [0m running build_py
  [31m   [0m creating build
  [31m   [0m creating build/lib.linux-x86_64-3.10
  [31m   [0m creating build/lib.linux-x86_64-3.10/psycopg2
  [31m   [0m copying lib/pool.py -> build/lib.linux-x86_64-3.10/psycopg2
  [31m   [0m copying lib/_ipaddress.py -> build/lib.linux-x86_64-3.10/psycopg2
  [31m   [0m copying lib/sql.py -> build/lib.linux-x86_64-3.10/psycopg2
  [31m   

In [44]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector

In [45]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in range(len(reader.pages)):
            text += reader.pages[page].extract_text() + "\n"
        return text,reader

pdf_path = "attention.pdf"  
pdf_text,pdf = extract_text_from_pdf(pdf_path)


In [46]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(separators='/n',chunk_size=1000,chunk_overlap=10)

In [47]:
from langchain.schema import Document
document = Document(page_content=pdf_text)
doc_text=text_splitter.split_documents([document])

In [48]:
print(doc_text)

[Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms

In [58]:
import psycopg2

# Replace with your actual credentials
conn = psycopg2.connect("postgresql://mysuer:mypassword@localhost:5432/mydatabase")

print("Connected to PostgreSQL successfully!")
conn.close()


Connected to PostgreSQL successfully!


In [63]:
connection = "postgresql+psycopg2://postgres:password@localhost:5432/mydatabase"  
collection_name = "my_docs"

embeddings=OpenAIEmbeddings()

vector_store = PGVector(
    embedding_function=embeddings,
    collection_name=collection_name,
    connection_string=connection,
    use_jsonb=True,
)

In [65]:
vector_store.add_documents(doc_text)

['86f7031e-cd24-4256-b471-e916f71bcdb8',
 '9587b297-5950-4e2e-ab0b-08644061b216',
 '96740451-23c3-4287-8907-e108b9bddf0b',
 '03726700-1f3c-4694-8eb4-880956d0f13a',
 '875b0eaf-026f-4585-836e-2de21a7df170',
 'b672df4e-e14e-48c6-a377-b56bdd4b2e5f',
 '267e5337-2c28-45fc-9380-7f99e2fd5330',
 '84ac944e-99f4-4a76-be57-783bb82a471c',
 'a795bf43-03ac-48fb-a75a-c84ec0caefc5',
 '0824a2f4-127e-41ef-ac31-840b0b2adb92',
 '4587005e-bb3b-4c0a-8d52-bbb211a6f339',
 '045f1bbd-cbc4-42eb-926b-13b0ac101634',
 'e675b8e1-5516-4385-920e-460ba36afe28',
 'f6009b44-f7a1-4b2a-af42-72b4210926d5',
 '717e80ad-7b89-4ad5-aa1d-18cfdfceef0b',
 '0d2ec381-586a-4eab-91ff-889250f4d6cf',
 'dc1d092f-e1cf-49ef-8416-fa57f6f2f078',
 '1952ccfd-d272-476d-8b40-22933c67fc06',
 'a544f5f7-fdb4-42a6-adef-c595b09db13d',
 '254c294a-ff91-4c9c-b2a1-b7dc88dae621',
 '80b43aca-1534-42ba-b86b-e3781e82bdec',
 '9420077b-e9c5-4cb3-b786-cb0acbe7e4e7',
 'fcc0f854-4fb0-47a2-9a23-3c451cf46218',
 '741273e0-639a-4198-a560-13303d9accbd',
 '51199f83-d06d-

In [67]:
results = vector_store.similarity_search_with_score(query="encoder")
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.207727] networks
in particular, have been firmly established as state of the art approaches in sequence modeling and
transduction problems such as language modeling and machine translation [ 35,2,5]. Numerous
efforts have since continued to push the boundaries of recurrent language models and encoder-decoder
architectures [38, 24, 15].
Recurrent models typically factor computation along the symbol positions of the input and output
sequences. Aligning the positions to steps in computation time, they generate a sequence of hidden
states ht, as a function of the previous hidden state ht−1and the input for position t. This inherently
sequential nature precludes parallelization within training examples, which becomes critical at longer
sequence lengths, as memory constraints limit batching across examples. Recent work has achieved
significant improvements in computational efficiency through factorization tricks [ 21] and conditional
computation [ 32], while also improving model per

In [68]:
from langchain.chains import RetrievalQA

retriever=vector_store.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff",  
    retriever=retriever  
)


In [69]:
qa_chain.invoke('what is attention')

{'query': 'what is attention',
 'result': 'In the context of this paper, "attention" refers to an attention mechanism in the field of machine learning, more specifically in the structure of neural networks. Attention mechanisms are used in sequence modeling and transduction models, allowing modeling of dependencies without concern for their distance in the input or output sequences. This means that the mechanism can focus on different aspects, or inputs, at different times, much like how human attention works—it focuses on one task at a time, ignoring other inputs until they become relevant. In the Transformer model proposed in this paper, the architecture relies entirely on an attention mechanism to draw global dependencies between input and output.'}

In [70]:
qa_chain.invoke('what is encoder')

{'query': 'what is encoder',
 'result': 'In the context provided, an encoder is part of a neural sequence transduction model. It maps an input sequence of symbol representations to a sequence of continuous representations. This type of encoder is composed of a stack of identical layers. Each layer has two sub-layers: a multi-head self-attention mechanism and a simple, position-wise fully connected feed-forward network. It uses a residual connection around each of these sub-layers, followed by layer normalization. The encoder plays an essential role in transforming the input data before it is processed by the decoder in the model.'}

In [72]:
qa_chain.invoke('what is decoder')

{'query': 'what is decoder',
 'result': 'In the given context, the decoder is a component of the "Transformer", a type of neural network used primarily for language translation tasks. The decoder is composed of a stack of six identical layers, each having three sub-layers. One of these sub-layers performs multi-head attention over the output of the encoder stack, and another is a self-attention sub-layer modified to prevent positions from attending to subsequent positions, so that the predictions for a position i can depend only on the known outputs at positions less than i. The decoder also has a residual connection around each of the sub-layers, followed by layer normalization. The output of each sub-layer is of dimension dmodel = 512.'}