In [3]:
# 1. Install required packages
!pip install openai langchain chromadb pypdf langchain_community langchain_openai

Collecting langchain_openai
  Downloading langchain_openai-0.3.25-py3-none-any.whl.metadata (2.3 kB)
Downloading langchain_openai-0.3.25-py3-none-any.whl (69 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.2/69.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_openai
Successfully installed langchain_openai-0.3.25


In [4]:
from google.colab import userdata
openai_token = userdata.get('OPENAI_API_KEY')
import os
os.environ['OPENAI_API_KEY'] = openai_token

In [5]:
import os
import getpass
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document
import numpy as np

In [6]:
pdf_url = "https://arxiv.org/pdf/1706.03762.pdf"
pdf_path = "attention_is_all_you_need.pdf"

In [7]:
import requests
if not os.path.exists(pdf_path):
    r = requests.get(pdf_url)
    with open(pdf_path, "wb") as f:
        f.write(r.content)

In [8]:
loader = PyPDFLoader(pdf_path)
pages = loader.load()

In [9]:
pages

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention_is_all_you_need.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogl

In [10]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(pages)

In [11]:
for i, doc in enumerate(docs):
    doc.metadata["source"] = "attention_is_all_you_need"
    doc.metadata["chunk_id"] = i

In [14]:
doc

Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention_is_all_you_need', 'total_pages': 15, 'page': 14, 'page_label': '15', 'chunk_id': 51}, page_content='Input-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nInput-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever

In [12]:
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="./chromadb_tutorial"
)

In [13]:
db

<langchain_community.vectorstores.chroma.Chroma at 0x7c4adc4d6cd0>

In [15]:
query = "What is self-attention mechanism in transformers?"
results = db.similarity_search(query, k=3)
print("\nTop 3 relevant chunks for the query:")
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---\n", doc.page_content[:500])


Top 3 relevant chunks for the query:

--- Result 1 ---
 The Transformer uses multi-head attention in three different ways:
• In "encoder-decoder attention" layers, the queries come from the previous decoder layer,
and the memory keys and values come from the output of the encoder. This allows every
position in the decoder to attend over all positions in the input sequence. This mimics the
typical encoder-decoder attention mechanisms in sequence-to-sequence models such as
[38, 2, 9].
• The encoder contains self-attention layers. In a self-attention la

--- Result 2 ---
 in the distance between positions, linearly for ConvS2S and logarithmically for ByteNet. This makes
it more difficult to learn dependencies between distant positions [ 12]. In the Transformer this is
reduced to a constant number of operations, albeit at the cost of reduced effective resolution due
to averaging attention-weighted positions, an effect we counteract with Multi-Head Attention as
described in section 3.2.
Se

In [16]:
#Metadata filtering: Only retrieve chunks with chunk_id < 5
filtered_results = db.similarity_search(
    query,
    k=2,
    filter={"chunk_id": {"$lt": 5}}
)
print("\nFiltered by metadata (chunk_id < 5):")
for doc in filtered_results:
    print("\n", doc.page_content[:300])


Filtered by metadata (chunk_id < 5):

 Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.com
Noam Shazeer∗
Google Brain
noam@google.com
Niki Par

 mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experiments on two machine translation tasks show these models to
be superior in quality while being more parallelizable and requiring


In [17]:
# Add a new document
new_doc = Document(
    page_content="This is a custom note about attention mechanisms.",
    metadata={"source": "custom_note", "chunk_id": 999}
)
db.add_documents([new_doc])
print("\nAdded a custom note.")


Added a custom note.


In [20]:
# Update: Chroma does not support in-place update, so delete and re-add

# First, retrieve the IDs of the documents to delete based on the filter
docs_to_delete = db.get(where={"chunk_id": 999})
ids_to_delete = docs_to_delete["ids"]

# Now, delete the documents using their IDs
if ids_to_delete:
    db.delete(ids=ids_to_delete)

updated_doc = Document(
    page_content="This is an UPDATED note about attention mechanisms.",
    metadata={"source": "custom_note", "chunk_id": 999}
)
db.add_documents([updated_doc])
print("Updated the custom note.")

Updated the custom note.


In [22]:
# First, retrieve the IDs of the documents to delete based on the filter
docs_to_delete = db.get(where={"chunk_id": 999})
ids_to_delete = docs_to_delete["ids"]

# Now, delete the documents using their IDs
if ids_to_delete:
    db.delete(ids=ids_to_delete)

print("Deleted the custom note.")

Deleted the custom note.


In [23]:
# Get embeddings for two texts
vec1 = embeddings.embed_query("Self-attention in transformers")
vec2 = embeddings.embed_query("The attention mechanism in neural networks")

def euclidean_distance(a, b):
    return np.linalg.norm(np.array(a) - np.array(b))

distance = euclidean_distance(vec1, vec2)
print(f"\nEuclidean distance between the two queries: {distance:.4f}")


Euclidean distance between the two queries: 0.5362


In [24]:
print(vec1)
print(vec2)

[-0.030005134642124176, -0.01743168570101261, 0.026609890162944794, -0.016368946060538292, 0.0036919841077178717, 0.003086429787799716, -0.005945129785686731, -0.023214643821120262, 0.0027051547076553106, -0.013739701360464096, 0.040135663002729416, 0.026278646662831306, 0.005810562055557966, 0.013767305761575699, 0.017017630860209465, -0.0007172627956606448, 0.029039008542895317, 0.02764502540230751, -0.004858237225562334, -0.022000085562467575, -0.01663118042051792, 0.009357627481222153, -0.014643720351159573, -0.017542099580168724, -0.009399032220244408, -0.012959899380803108, 0.02107536420226097, -0.03188218176364899, -0.006562760565429926, -0.04256478324532509, 0.022814391180872917, -0.0034452767577022314, -0.0006353145581670105, -0.0213375985622406, -0.002762087155133486, 0.00033210605033673346, 0.018384011462330818, -0.016934821382164955, 0.03406286612153053, -0.005092867650091648, 0.012676962651312351, 0.020136840641498566, -0.00408533588051796, 0.008764149621129036, -0.0215308

In [25]:
#RAG: Retrieval-Augmented Generation
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

# Create a retriever from the vector database
retriever = db.as_retriever(search_kwargs={"k": 3})

# Set up RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0),
    retriever=retriever,
    return_source_documents=True
)

# Ask a question using RAG
question = "Explain the concept of self-attention in transformers."
rag_result = qa_chain({"query": question})

print("\nRAG Answer:")
print(rag_result["result"])
print("\nSources used:")
for doc in rag_result["source_documents"]:
    print("-", doc.metadata)

  rag_result = qa_chain({"query": question})



RAG Answer:
Self-attention, also known as intra-attention, is an attention mechanism that allows different positions within a single sequence to interact with each other. In the context of transformers, self-attention enables each position in the sequence to attend to all other positions, capturing dependencies and relationships between different parts of the input sequence. This mechanism helps the transformer model compute representations of the input and output sequences without relying on traditional recurrent neural networks (RNNs) or convolutional structures. By using self-attention, transformers can effectively learn long-range dependencies and relationships within the input data, making them powerful for various natural language processing tasks like reading comprehension, summarization, and textual entailment.

Sources used:
- {'author': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'producer': 'pdfTeX-1.40.25