In [1]:
## text file loader
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

#### 1. RAG Project: Load Data

In [2]:
## read a text file

from langchain_community.document_loaders import TextLoader

## read the text file
text_loader = TextLoader ("./alexander.txt")
text_document = text_loader.load()
## text_document


In [3]:
## read a web page

import bs4
from langchain_community.document_loaders import WebBaseLoader

web_loader = WebBaseLoader (  ## first param is the web url
                        web_paths = ("https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",),
                        ## second param is arguments
                        bs_kwargs = dict (parse_only=bs4.SoupStrainer (
                            class_= ("post-title","post-content","post-header"))
                        )
                     )

web_document = web_loader.load()
## web_document

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
## read a PDF file

from langchain_community.document_loaders import PyPDFLoader
pdf_loader = PyPDFLoader ('./Attention-Need.pdf')
pdf_document = pdf_loader.load()

#pdf_document


#### 2. RAG Project, Transform Data

In [5]:
### convert document to chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

## split the data
text_splitter = RecursiveCharacterTextSplitter (chunk_size=1000, chunk_overlap=200)
text_splitter.split_documents(pdf_document)[:5]

[Document(metadata={'source': './Attention-Need.pdf', 'page': 0}, page_content='Attention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗†\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior 

In [6]:
documents=text_splitter.split_documents (pdf_document)
#documents

#### Convert data chunks to Vector Embeddings and Vector Store

In [None]:
#from langchain_community.embeddings import OpenAIEmbeddings
#from langchain_community.embeddings import OllamaEmbeddings

#embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", chunk_size=1, max_retries=1)
#db = Chroma.from_documents(documents=documents, embedding=OllamaEmbeddings(), persist_directory="./chroma_db")

## Working code below, commented out, will use FAISS only
# # # from langchain_openai import OpenAIEmbeddings
# # # from langchain_community.vectorstores import Chroma

# # # db = Chroma.from_documents(documents=documents, embedding=OpenAIEmbeddings(), persist_directory="./chroma_db")


In [None]:
## vector database querying

## Working code below, commented out, will use FAISS only
# # # prompt          = "An attention function can be described as mapping"
# # # prompt_results  = db.similarity_search(prompt)
# # # print (prompt_results[0].page_content)

In [7]:
## FAISS Vector Database
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents (documents [:15], OpenAIEmbeddings())

In [8]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x266d9161ae0>

In [9]:
query="An attention function can be described as mapping a query "
result=db.similarity_search(query)
result[0].page_content

'around each of the sub-layers, followed by layer normalization. We also modify the self-attention\nsub-layer in the decoder stack to prevent positions from attending to subsequent positions. This\nmasking, combined with fact that the output embeddings are offset by one position, ensures that the\npredictions for position ican depend only on the known outputs at positions less than i.\n3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\nof the values, where the weight assigned to each value is computed by a compatibility function of the\nquery with the corresponding key.\n3.2.1 Scaled Dot-Product Attention\nWe call our particular attention "Scaled Dot-Product Attention" (Figure 2). The input consists of\nqueries and keys of dimension dk, and values of dimension dv. We compute the dot products of the\n3'

In [10]:
from langchain_community.llms import Ollama
from langchain_openai import ChatOpenAI
## Load Ollama LAMA2 LLM model
# llm=Ollama(model="llama2")
# llm

llm = ChatOpenAI (model="gpt-3.5-turbo-1106", temperature=0.6)
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000266D9398CD0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x00000266D9399660>, root_client=<openai.OpenAI object at 0x00000266D91637C0>, root_async_client=<openai.AsyncOpenAI object at 0x00000266D9398D30>, model_name='gpt-3.5-turbo-1106', temperature=0.6, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [11]:
## prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Please answer the following question with the provided context. 
Analyze the context step by step before giving an answer. 
You are an expert assistant. 
<context>
{context}
</context>
Question: {input}""")

In [13]:
## creat a chain (a chain is a sequence of calls to an llm or data preprocessing steps, we use LCEL -langchain expression language)
from pydantic import BaseModel
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain (llm, prompt)


In [14]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000266D9161AE0>, search_kwargs={})

In [15]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [16]:
response = retrieval_chain.invoke ({"input": "What is Multi-Head Attention"})

In [17]:
response ['answer']

'Multi-Head Attention consists of several attention layers running in parallel. It allows the model to jointly attend to information from different representation subspaces at different positions. With a single attention head, averaging inhibits this. Multi-Head Attention is a mechanism used in the Transformer model to compute representations of its input and output without using sequence-aligned RNNs or convolution. It involves linearly projecting the queries, keys, and values multiple times with different, learned linear projections, and then performing the attention function in parallel on each of these projected versions. This yields output values that are concatenated and projected again to obtain the final values.'