In [4]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ['PINECONE_API_KEY'] = 'ff509895-3d6b-423b-ba58-88df2d70ac2b'
pinecone_api_key = os.environ['PINECONE_API_KEY']
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = 'gpt-3.5-turbo'
MODEL = 'llama3'

In [5]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings



if MODEL.startswith('gpt'):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=MODEL )
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)


# model.invoke('Tell me a joke')

In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter


loader = PyPDFLoader("1706.03762v7.pdf")
pdf_text = loader.load()
pdf_text_list = [doc.page_content for doc in pdf_text]  


# pages= loader.load_and_split()
# pages
doc = "\n".join(pdf_text_list)  # Join all pages into a single string

headers_to_split_on = [
    ("##", "Header 2")
]

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on, strip_headers=False
)
md_header_splits = markdown_splitter.split_text(doc)

In [7]:
from langchain.prompts import PromptTemplate

template = '''
Answer the questions based on the context from the given data. If you don't know the answer, just say that you don't know.


context: {context}
question: {question}
'''

prompt = PromptTemplate(template=template)
prompt.format(context = 'Here is some context', question='here is a question')


"\nAnswer the questions based on the context from the given data. If you don't know the answer, just say that you don't know.\n\n\ncontext: Here is some context\nquestion: here is a question\n"

In [16]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

from langchain_pinecone import PineconeEmbeddings
from langchain_pinecone import PineconeVectorStore
import os
import time


pc = Pinecone(api_key=pinecone_api_key)

namespace = "RAG_Chatbot_vectors"

'''
index is a structure where your embeddings (vector representations of your data) 
are stored and managed, enabling you to efficiently search for similar vectors based on queries.
'''

index_name = "docs-quickstart-index"
embedding_dim = 1024

if index_name in pc.list_indexes().names():
    pc.delete_index(name=index_name)
# to ensure that any previous state of the index is wiped clean
pc.create_index(
    name=index_name,
    dimension=embedding_dim,
    metric="cosine",      # cosinee similarity
    spec=ServerlessSpec(
        cloud='aws', 
        region='us-east-1'
    ) 
)

#precautionary step to ensure that the index exists before proceeding with further operations
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=embedding_dim,
        metric="cosine",
        spec=ServerlessSpec(        #ServerlessSpec is used to specify the cloud provider and region for the index.
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

model_name = "multilingual-e5-large"   # used for converting embedding text into dense vector spaces
embeddings = PineconeEmbeddings(  
    model=model_name,  
    pinecone_api_key=pinecone_api_key
)  
docsearch = PineconeVectorStore.from_documents(
    documents=md_header_splits,
    index_name=index_name,
    embedding=embeddings, 
    namespace=namespace 
)

time.sleep(1)

In [17]:
index = pc.Index(index_name)
# retrieving and printing out information about vectors stored in the Pinecone index
for ids in index.list(namespace=namespace):
    query = index.query(
        id=ids[0], 
        namespace=namespace, 
        top_k=1,
        include_values=True,
        include_metadata=True
    )
    print(query)

{'matches': [{'id': 'a4a9c950-cbb9-42d5-9a11-5569fb74d214',
              'metadata': {'text': 'Provided proper attribution is provided, '
                                   'Google hereby grants permission to\n'
                                   'reproduce the tables and figures in this '
                                   'paper solely for use in journalistic or\n'
                                   'scholarly works.\n'
                                   'Attention Is All You Need\n'
                                   'Ashish Vaswani∗\n'
                                   'Google Brain\n'
                                   'avaswani@google.comNoam Shazeer∗\n'
                                   'Google Brain\n'
                                   'noam@google.comNiki Parmar∗\n'
                                   'Google Research\n'
                                   'nikip@google.comJakob Uszkoreit∗\n'
                                   'Google Research\n'
                            

In [18]:
from langchain.chains import RetrievalQA  

#creates a vector store from an existing Pinecone index
knowledge = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    namespace=namespace,    
    embedding=embeddings
)
# creates a retriever using the vector store
qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",     #All retrieved documents are combined into one and passed to the model in a single step.
    retriever=knowledge.as_retriever()
)

Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x0000021E64ED3CD0>


In [19]:
query1 = """What are the first 3 important facts about transformers"""

query2 = """explain Multihead attention"""

print("Query 1\n")
print("Chat with knowledge:")
print(qa.invoke(query1).get("result"))
print("\nChat without knowledge:")
print(model.invoke(query1))
print("\nQuery 2\n")
print("Chat with knowledge:")
print(qa.invoke(query2).get("result"))
print("\nChat without knowledge:")
print(model.invoke(query2))


Query 1

Chat with knowledge:
Based on the provided context, which appears to be related to a paper on neural machine translation and attention mechanisms in transformers, here are three important facts about transformers:

1. **Self-Attention Mechanism**: Transformers use self-attention mechanisms to allow the model to attend to different parts of the input sequence simultaneously and weigh their importance. This is demonstrated in Figures 3 and 4, which show the attention heads attending to distant dependencies in the encoder self-attention at layer 5.

2. **Multi-Head Attention**: The transformer architecture uses multi-head attention, where multiple attention heads are used to capture different aspects of the input sequence. These attention heads can perform different tasks, as shown in Figure 5, which demonstrates two examples of attention heads exhibiting behavior related to sentence structure.

3. **Layer-by-Layer Processing**: Transformers process input sequences layer by layer