In [41]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface.llms import HuggingFacePipeline
#from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain.tools.retriever import create_retriever_tool
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
#from transformers import pipeline
from langchain import hub
from IPython.display import HTML, display, Markdown
import torch
import os
from dotenv import load_dotenv

In [2]:
api_wrapper=WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=200)
wiki=WikipediaQueryRun(api_wrapper=api_wrapper)

In [3]:
wiki.name

'wikipedia'

In [4]:
# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cpu


  return torch._C._cuda_getDeviceCount() > 0


In [5]:
print("CUDA available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())

CUDA available: False
Number of GPUs: 1


In [6]:
# load .env file to environment
load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"]=os.getenv("HUGGINGFACEHUB_API_TOKEN")
## Langsmith tracking
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
groq_api_key=os.environ['GROQ_API_KEY']

In [7]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    #model_name="BAAI/bge-small-en-v1.5",  #sentence-transformers/all-MiniLM-l6-v2
    model_name="all-MiniLM-L6-v2",    
    model_kwargs={'device':device},
    encode_kwargs={'normalize_embeddings':True}
)

  from tqdm.autonotebook import tqdm, trange


In [8]:
text = "This is a test document."
query_result = huggingface_embeddings.embed_query(text)

In [9]:
import numpy as np
print(np.array(query_result).shape)
#print(np.array(query_result))

(384,)


In [10]:
#doc_result = huggingface_embeddings.embed_documents([text, "This is not a test document."])
#print(np.array(doc_result))

In [11]:
loader=WebBaseLoader("https://docs.smith.langchain.com/")
docs=loader.load()
documents=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200).split_documents(docs)
#vectordb=FAISS.from_documents(documents,OpenAIEmbeddings())
vectordb=FAISS.from_documents(documents,huggingface_embeddings)
retriever=vectordb.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f8671211f30>)

In [12]:
from langchain.tools.retriever import create_retriever_tool
retriever_tool=create_retriever_tool(retriever,"langsmith_search",
                      "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!")

In [13]:
retriever_tool.name

'langsmith_search'

In [14]:
## Arxiv Tool
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

arxiv_wrapper=ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv=ArxivQueryRun(api_wrapper=arxiv_wrapper)
arxiv.name

'arxiv'

In [45]:
pdfloader=PyPDFLoader("../huggingface/papers/1706.03762v7.pdf")

pdfdocs=pdfloader.load()
print(len(pdfdocs))
pdfdocuments=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200).split_documents(pdfdocs)
print(pdfdocs[3])

15
page_content='Scaled Dot-Product Attention
 Multi-Head Attention
Figure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several
attention layers running in parallel.
of the values, where the weight assigned to each value is computed by a compatibility function of the
query with the corresponding key.
3.2.1 Scaled Dot-Product Attention
We call our particular attention "Scaled Dot-Product Attention" (Figure 2). The input consists of
queries and keys of dimension dk, and values of dimension dv. We compute the dot products of the
query with all keys, divide each by√dk, and apply a softmax function to obtain the weights on the
values.
In practice, we compute the attention function on a set of queries simultaneously, packed together
into a matrix Q. The keys and values are also packed together into matrices KandV. We compute
the matrix of outputs as:
Attention( Q, K, V ) = softmax(QKT
√dk)V (1)
The two most commonly used attention functions are additive at

In [16]:
##custom pdf tool

## Read the ppdfs from the folder
pdf_loader=PyPDFDirectoryLoader("../huggingface/papers")

pdf_docs=pdf_loader.load()
print(len(pdf_docs))
pdf_documents=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200).split_documents(pdf_docs)
print(pdf_docs[3])

190
page_content='Figure 1.2: Larger models make increasingly efﬁcient use of in-context information. We show in-context learning
performance on a simple task requiring the model to remove random symbols from a word, both with and without a
natural language task description (see Sec. 3.9.2). The steeper “in-context learning curves” for large models demonstrate
improved ability to learn a task from contextual information. We see qualitatively similar behavior across a wide range
of tasks.
sufﬁcient to enable a human to perform a new task to at least a reasonable degree of competence. Aside from pointing
to a conceptual limitation in our current NLP techniques, this adaptability has practical advantages – it allows humans
to seamlessly mix together or switch between many tasks and skills, for example performing addition during a lengthy
dialogue. To be broadly useful, we would someday like our NLP systems to have this same ﬂuidity and generality.
One potential route towards addressing th

In [17]:
## VectorStore Creation
pdf_vectorstore=FAISS.from_documents(pdf_documents,huggingface_embeddings)
pdf_retriever=pdf_vectorstore.as_retriever()
pdf_retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f8677bdd540>)

In [18]:
pdf_retriever_tool=create_retriever_tool(pdf_retriever,"pdf_search",
                      "Search for information about transfomer, GPT-3 and GPT-4 model. For any questions about text generation of languange model, you must use this tool!")

In [19]:
pdf_retriever_tool.name

'pdf_search'

In [20]:
tools=[wiki,arxiv,retriever_tool,pdf_retriever_tool]

In [21]:
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/home/kousik/Desktop/Learning_projects/Langchain_tutorial/venv/lib/python3.10/site-packages/wikipedia/__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=200)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=200, arxiv_result=<class 'arxiv.Result'>)),
 Tool(name='langsmith_search', description='Search for information about LangSmith. For any questions about LangSmith, you must use this tool!', args_schema=<class 'langchain_core.tools.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x7f87729003a0>, retriever=VectorStoreRetriever(tags=['FAI

In [22]:
'''
from dotenv import load_dotenv

load_dotenv()
import os
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
'''

'\nfrom dotenv import load_dotenv\n\nload_dotenv()\nimport os\nos.environ[\'OPENAI_API_KEY\']=os.getenv("OPENAI_API_KEY")\nfrom langchain_openai import ChatOpenAI\n\nllm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)\n'

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [24]:
#device_id = 0 if torch.cuda.is_available() else -1
#print(device_id)
'''
from langchain_huggingface.llms import HuggingFacePipeline
# Create the HuggingFacePipeline with GPU support
llm = HuggingFacePipeline.from_model_id(
    #model_id="openai-community/gpt2",
    #model_id="mistralai/Mistral-7B-v0.1",
    model_id="google/flan-t5-xl",
    task="text-generation",
    #device=device_id,
    device_map="auto",
    pipeline_kwargs={
        "temperature":0,
        "max_new_tokens": 150
    }
)
'''
llm=ChatGroq(groq_api_key=groq_api_key,
         model_name="mixtral-8x7b-32768")

In [25]:


from langchain import hub
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages
'''
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>

Question: {input}""")
'''


'\nprompt = ChatPromptTemplate.from_template("""\nAnswer the following question based only on the provided context. \nThink step by step before providing a detailed answer. \nI will tip you $1000 if the user finds the answer helpful. \n<context>\n{context}\n</context>\n\nQuestion: {input}""")\n'

In [26]:
### Agents
agent=create_openai_tools_agent(llm,tools,prompt)

In [27]:
agent

RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-functions-agent', 'lc_hub_commit_hash': 'a1655024b06afbd95d17449f21316291e0726f13dcfaf990cc0d18087ad689a5'}, mess

In [28]:
## Agent Executer
from langchain.agents import AgentExecutor
agent_executor=AgentExecutor(agent=agent,tools=tools,verbose=True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-functions-agent', 'lc_hub_commit_hash': 'a165

In [29]:
agent_executor.invoke({"input":"Tell me about Langsmith"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `langsmith_search` with `{'query': 'Langsmith'}`


[0m[38;5;200m[1;3mGet started with LangSmith | ü¶úÔ∏èüõ†Ô∏è LangSmith

Skip to main contentGo to API DocsSearchRegionUSEUGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingLangGraph CloudQuick startOn this pageGet started with LangSmithLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!1. Install LangSmith‚ÄãPythonTypeScriptpip install -U langsmithyarn add langchain langsmith2. Create an API key‚ÄãTo create an API key head to the Settings page. Then click Create API Key.3. Set up your environment‚ÄãShellexport LANGCHAIN_TRACING_V2=trueexport LANGCHAIN_API_KEY=<your-api-key># The below examples use the OpenAI API, though it's not necessary in ge

{'input': 'Tell me about Langsmith',
 'output': 'LangSmith is a platform for building production-grade LLM (large language model) applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. You can get started with LangSmith by installing it via pip or npm, creating an API key, and setting up your environment. Once set up, you can log traces using the provided methods and take advantage of LangSmith\'s features, such as using their wrappers for LLM calls, auto-tracing, and evaluation.\n\nFor more information on how to use LangSmith and its capabilities, you can refer to the documentation at <https://langsmith.com/docs/>.\n\n(Note: I do not have the ability to browse the web, so I cannot directly link you to the LangSmith documentation. However, you can easily find it by searching "LangSmith documentation" in your web browser.)\n\nConfidence: 90%'}

In [30]:
agent_executor.invoke({"input":"What's the paper 1706.03762v7 about?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `arxiv` with `{'query': '1706.03762v7'}`


[0m[33;1m[1;3mPublished: 2023-08-02
Title: Attention Is All You Need
Authors: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin
Summary: The do[0m[32;1m[1;3mAttention Is All You Need is a paper published on 2023-08-02, written by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. The paper introduces a new type of neural network architecture called the Transformer, which relies solely on attention mechanisms to perform machine translation tasks. This architecture has been influential in the development of language models like GPT-3 and GPT-4.[0m

[1m> Finished chain.[0m


{'input': "What's the paper 1706.03762v7 about?",
 'output': 'Attention Is All You Need is a paper published on 2023-08-02, written by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. The paper introduces a new type of neural network architecture called the Transformer, which relies solely on attention mechanisms to perform machine translation tasks. This architecture has been influential in the development of language models like GPT-3 and GPT-4.'}

In [31]:
#from langchain.memory import ConversationBufferWindowMemory
from langchain.memory import ConversationBufferMemory

In [32]:
# Step 1: Create an instance of the memory with initial chat history
memory = ConversationBufferMemory(memory_key = "chat_history", k=5, return_messages=True)

# Step 2: Set up the conversational retrieval chain
'''conversational_retrieval_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,  # Use the previously created retriever
    memory=memory,
    verbose=True
)'''

'conversational_retrieval_chain = ConversationalRetrievalChain.from_llm(\n    llm=llm,\n    retriever=retriever,  # Use the previously created retriever\n    memory=memory,\n    verbose=True\n)'

In [33]:
# Step 3: Create an agent that uses the conversational retrieval chain
try:
    agent = create_openai_tools_agent(llm, tools, prompt)
    print("Agent created successfully with conversational retrieval chain")
except Exception as e:
    raise RuntimeError("Failed to create the agent with conversational retrieval chain.") from e

Agent created successfully with conversational retrieval chain


In [34]:
# Step 4: Create Agent Executor
try:
    agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True)
    print("Agent executor successfully created with conversational retrieval chain.")
except Exception as e:
    raise RuntimeError("Failed to create the agent executor with conversational retrieval chain.") from e

Agent executor successfully created with conversational retrieval chain.


In [35]:
# Step 5: Invoke the agent with some input
try:
    response = agent_executor.invoke({"input":"Tell me about Langsmith?"})

    # Print the response
    print(response)
except Exception as e:
    raise RuntimeError("Failed to invoke the agent.") from e



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `langsmith_search` with `{'query': 'Langsmith'}`


[0m[38;5;200m[1;3mGet started with LangSmith | ü¶úÔ∏èüõ†Ô∏è LangSmith

Skip to main contentGo to API DocsSearchRegionUSEUGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingLangGraph CloudQuick startOn this pageGet started with LangSmithLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!1. Install LangSmith‚ÄãPythonTypeScriptpip install -U langsmithyarn add langchain langsmith2. Create an API key‚ÄãTo create an API key head to the Settings page. Then click Create API Key.3. Set up your environment‚ÄãShellexport LANGCHAIN_TRACING_V2=trueexport LANGCHAIN_API_KEY=<your-api-key># The below examples use the OpenAI API, though it's not necessary in ge

In [44]:
try:
    response = agent_executor.invoke({"input":"What's the paper 1706.03762v7 about?"})
# Print the response
    print(response)
except Exception as e:
    raise RuntimeError("Failed to invoke the agent.") from e



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe paper with the title "Attention Is All You Need" and the ID 1706.03762v7 is a groundbreaking work in the field of natural language processing (NLP). It was published by Ashish Vaswani and his colleagues at Google Brain in 2017.

In this paper, the authors introduce the Transformer model, a novel architecture for NLP tasks that eliminates the need for recurrence and convolutions. Instead, it relies entirely on self-attention mechanisms to process input sequences.

The Transformer model has several benefits compared to traditional architectures like LSTMs and GRUs. For example, it is parallelizable and can process input sequences of arbitrary lengths. This makes it well-suited for tasks like machine translation and text summarization, where input and output sequences can be very long.

The paper presents the Transformer model in detail, including its architecture and training procedure. It also includes experiments that dem