In [1]:
# Data Ingestion - Txt file
from langchain_community.document_loaders import TextLoader

loader = TextLoader("speech.txt")
text_doc = loader.load()

1

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [6]:
# Data Ingestion Web based
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Load and index the content of html page

loader = WebBaseLoader(
    web_path = "https://aws.amazon.com/what-is/application-performance-monitoring/",
    bs_kwargs = dict(parse_only = bs4.SoupStrainer(
        class_ = ("aws-page-content-main"),
    )) 

                       )

text_doc = loader.load()
text_doc

[Document(metadata={'source': 'https://aws.amazon.com/what-is/application-performance-monitoring/'}, page_content='')]

In [10]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Attention.pdf")
text_doc = loader.load()

In [13]:
# Transform to Chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size= 1000, chunk_overlap = 200)
docs = text_splitter.split_documents(text_doc)


In [15]:
# Converting to Vector Embeddings
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(docs, OpenAIEmbeddings())


In [21]:
query = "Authors"

result = db.similarity_search(query)
result[0].page_content

'[25] Mitchell P Marcus, Mary Ann Marcinkiewicz, and Beatrice Santorini. Building a large annotated\ncorpus of english: The penn treebank. Computational linguistics, 19(2):313–330, 1993.\n[26] David McClosky, Eugene Charniak, and Mark Johnson. Effective self-training for parsing. In\nProceedings of the Human Language Technology Conference of the NAACL, Main Conference,\npages 152–159. ACL, June 2006.\n[27] Ankur Parikh, Oscar Täckström, Dipanjan Das, and Jakob Uszkoreit. A decomposable attention\nmodel. In Empirical Methods in Natural Language Processing, 2016.\n[28] Romain Paulus, Caiming Xiong, and Richard Socher. A deep reinforced model for abstractive\nsummarization. arXiv preprint arXiv:1705.04304, 2017.\n[29] Slav Petrov, Leon Barrett, Romain Thibaux, and Dan Klein. Learning accurate, compact,\nand interpretable tree annotation. In Proceedings of the 21st International Conference on\nComputational Linguistics and 44th Annual Meeting of the ACL, pages 433–440. ACL, July\n2006.'

In [None]:
# Faiss db
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(docs, OpenAIEmbeddings())



In [19]:
query = "What is this about"

result = db.similarity_search(query)
result[0]

Document(metadata={'page': 13, 'source': 'Attention.pdf'}, page_content='Input-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nInput-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nFigure 4: Two attention heads, also in layer 5 of 6, apparently involved in anaphora resolution. Top:\nFull attentions for head 5. Bottom: Isolated attentions from just the word ‘its’ for attention heads 5\nand 6. Note that the attentions are very sharp for this word.\n1

In [22]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3.2")

  llm = Ollama(model="llama3.2")


In [23]:
# Creating a context window for llm 
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""

Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer.
<context>
{context}
</context>

Question: {input} 
""")

In [24]:
# Using a chain sequence of calls using the docs we provide, putting docs into context
from langchain.chains.combine_documents import create_stuff_documents_chain

# Takes Prompt with a defined context from the quried docs and llm
document_chain = create_stuff_documents_chain(llm, prompt)

In [25]:
# Retriver returns docs given unstructured query, vector store used as backbone
retriever = db.as_retriever()


In [26]:
"""
Combining retriever chain, chain takes in a user inquiry, which is then passed to the retriever to fetch the relevant docs. These docs and input query is passed to an llm to generate a response
"""

from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)



In [27]:
response = retrieval_chain.invoke({"input":"Scaled Dot Product attention"})
response['answer']

"Based on the provided context, Scaled Dot-Product Attention is a technique used in the Transformer model to compute attention weights. Here's how it works step by step:\n\n1. The input consists of queries (Q) and keys (K) of dimension dk, and values (V) of dimension dv.\n2. The dot product of each query with all keys is computed.\n3. Each dot product is then divided by √dk to reduce the magnitude.\n4. A softmax function is applied to each set of dot products to obtain the weights on the values.\n5. The output matrix is obtained by multiplying the scaled dot products with the values, as shown in the equation:\nAttention(Q, K, V) = softmax(QKT√dk)V\n\nIn practice, queries, keys, and values are packed together into matrices Q, K, and V, respectively.\n\nThe scaling factor of √dk is used to prevent extremely large gradients when computing dot products for larger values of dk."