In [59]:
# %pip install transformers
# %pip install tensorflow
# %pip install torch
#pip install pydantic==1.10.8
#pip install pydantic
# %pip install "pinecone-client[grpc]"
# %pip install langchain_pinecone


In [60]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ['PINECONE_API_KEY'] = 'ff509895-3d6b-423b-ba58-88df2d70ac2b'
pinecone_api_key = os.environ['PINECONE_API_KEY']
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = 'gpt-3.5-turbo'
MODEL = 'llama3'

In [61]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings



if MODEL.startswith('gpt'):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=MODEL )
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)


# model.invoke('Tell me a joke')

"Here's one:\n\nWhy couldn't the bicycle stand up by itself?\n\n(wait for it...)\n\nBecause it was two-tired!\n\nHope that made you laugh!"

In [62]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
chain = model | parser
chain.invoke('tell a 5 line story')

'As the clock struck midnight, Emma crept into the abandoned mansion to explore. She had always been drawn to the whispers of hidden treasures within its walls. Suddenly, she stumbled upon a dusty old key and a faint light flickered to life in front of her. The room began to glow with an otherworldly radiance, revealing a secret garden hidden for decades. In the center of the garden, Emma found a beautiful music box playing her favorite lullaby.'

In [63]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter


loader = PyPDFLoader("1706.03762v7.pdf")
pdf_text = loader.load()
pdf_text_list = [doc.page_content for doc in pdf_text]  


# pages= loader.load_and_split()
# pages
doc = "\n".join(pdf_text_list)  # Join all pages into a single string

headers_to_split_on = [
    ("##", "Header 2")
]

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on, strip_headers=False
)
md_header_splits = markdown_splitter.split_text(doc)

In [64]:
from langchain.prompts import PromptTemplate

template = '''
Answer the questions based on the context from the given data. If you don't know the answer, just say that you don't know.


context: {context}
question: {question}
'''

prompt = PromptTemplate(template=template)
prompt.format(context = 'Here is some context', question='here is a question')




"\nAnswer the questions based on the context from the given data. If you don't know the answer, just say that you don't know.\n\n\ncontext: Here is some context\nquestion: here is a question\n"

In [65]:
chain = prompt | model | parser


In [66]:
chain.invoke(
    {
        "context": 'the data from the previous 10 years show that the temperature is on a rise',
        "question": 'temperature this year is 20 degrees. What will be the temperature next year?'
    }
)

"Based on the context that the temperature has been rising over the past 10 years, I would answer that I don't know what the temperature will be next year. The trend in the data suggests an upward trajectory, but there is no specific information provided to predict exactly what the temperature will be next year."

In [67]:
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}},
 'required': ['context', 'question']}

In [68]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(api_key='ff509895-3d6b-423b-ba58-88df2d70ac2b')

In [69]:
index_name = "docs-quickstart-index"
embedding_dim = 1024

if index_name in pc.list_indexes().names():
    pc.delete_index(name=index_name)

pc.create_index(
    name=index_name,
    dimension=embedding_dim,
    metric="cosine",
    spec=ServerlessSpec(
        cloud='aws', 
        region='us-east-1'
    ) 
)

In [70]:
# from langchain_community.vectorstores import DocArrayInMemorySearch
# # used for making small vectorstores, for large ones use Pinecone

# vectorstore = DocArrayInMemorySearch.from_documents(pages,
#                                                     embedding=embeddings,)
from langchain_pinecone import PineconeEmbeddings
from langchain_pinecone import PineconeVectorStore
import os
import time




if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=embedding_dim,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

model_name = "multilingual-e5-large"  
embeddings = PineconeEmbeddings(  
    model=model_name,  
    pinecone_api_key=pinecone_api_key
)  
docsearch = PineconeVectorStore.from_documents(
    documents=md_header_splits,
    index_name=index_name,
    embedding=embeddings, 
    namespace="wondervector5000" 
)

time.sleep(1)

In [71]:
# retriever retrirves information from anywhere, in this case from the vectorstore. It can be any other object

# documents = vectorstore.as_retriever().invoke("MultiHead Attention")

index = pc.Index(index_name)
namespace = "wondervector5000"

for ids in index.list(namespace=namespace):
    query = index.query(
        id=ids[0], 
        namespace=namespace, 
        top_k=1,
        include_values=True,
        include_metadata=True
    )
    print(query)


In [72]:
from langchain.chains import RetrievalQA  


knowledge = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    namespace="wondervector5000",
    embedding=embeddings
)

qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=knowledge.as_retriever()
)

Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x000001ED09CFA710>


In [58]:
# cleaned_docs = []
# for doc in documents:
#     content = doc.page_content
#     cleaned_content = ' '.join(content.split())
#     cleaned_docs.append(cleaned_content)
# with open('cleaned_output.txt', 'w') as f:
#     for doc in cleaned_docs:
#         f.write(doc + '\n\n')  


# Define a few questions about the WonderVector5000.
query1 = """What are the first 3 important facts about transformers"""

query2 = """explain Multihead attention"""

# Send each query to the LLM twice, first with relevant knowledge from Pincone 
# and then without any additional knowledge.
print("Query 1\n")
print("Chat with knowledge:")
print(qa.invoke(query1).get("result"))
print("\nChat without knowledge:")
print(model.invoke(query1))
print("\nQuery 2\n")
print("Chat with knowledge:")
print(qa.invoke(query2).get("result"))
print("\nChat without knowledge:")
print(model.invoke(query2))


Query 1

Chat with knowledge:
Based on the provided context, here are three important facts about Transformers:

1. **Attention Mechanism**: Transformers use an attention mechanism that allows them to focus on specific parts of the input sequence (in this case, sentences) and weigh their importance. This is demonstrated in Figures 3-5, which show how different attention heads attend to distant dependencies or perform tasks like anaphora resolution.
2. **Encoder Self-Attention**: The attention mechanism is used in the encoder self-attention layer, which allows the model to consider long-range dependencies within a sentence. This is shown in Figure 3, where multiple attention heads attend to a distant dependency of the verb "making".
3. **Multi-Head Attention**: Transformers use multi-head attention, which allows different attention heads to perform different tasks and weigh their importance. This is demonstrated in Figures 4 and 5, which show two attention heads involved in anaphora res