In [73]:
from langchain_groq import ChatGroq
from crewai import Agent
from crewai import Crew
from crewai import Task
from langchain.document_loaders import PyPDFLoader
from langchain_community.tools.tavily_search import TavilySearchResults

In [74]:
import os
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [75]:
#model 
llm=ChatGroq(
        temperature = 0.5,
        model="llama3-8b-8192",
        api_key= GROQ_API_KEY,
        max_tokens=1024,
       
)
response = llm.invoke("What is attention in transformers?")
print(response)

content='In the context of transformers, attention refers to a mechanism that allows the model to focus on specific parts of the input sequence when processing it. This is particularly useful when dealing with long-range dependencies or when the input sequence has a complex structure.\n\nIn traditional recurrent neural networks (RNNs), the model processes the input sequence sequentially, one element at a time. However, this can lead to difficulties when dealing with long-range dependencies, as the model may not be able to capture information that is relevant but far away in the sequence.\n\nTransformers, on the other hand, use self-attention mechanisms to allow the model to attend to different parts of the input sequence simultaneously. This is achieved by computing a weighted sum of the input sequence, where the weights are learned during training and reflect the importance of each element in the sequence.\n\nThe attention mechanism in transformers is typically implemented using three

In [76]:
import requests

pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"
response = requests.get(pdf_url)

with open("attenstion_is_all_you_need.pdf", "wb") as file:
    file.write(response.content)

In [77]:
# Load the PDF
loader = PyPDFLoader("attenstion_is_all_you_need.pdf")
documents = loader.load()

In [78]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

In [79]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(documents)

In [80]:
from langchain.embeddings import HuggingFaceEmbeddings
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

embeddings = download_hugging_face_embeddings()

In [81]:
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()

In [82]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [83]:
response = rag_chain.invoke("What is the main contribution of the paper 'Attention Is All You Need'?",
)

In [84]:
response['result']

'The paper "Attention Is All You Need" by Vaswani et al. (2017) introduces the Transformer model, which is a novel approach to neural machine translation that relies entirely on self-attention mechanisms, without using traditional recurrent or convolutional neural networks. The main contribution of the paper is the development of this Transformer model, which achieves state-of-the-art results in machine translation tasks and has since become a widely used and influential architecture in natural language processing.'

In [85]:
from dotenv import load_dotenv
load_dotenv()
TAVILY_API_KEY = os.environ["TAVILY_API_KEY"]
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
tavily_tool = TavilySearchResults(k=3)

In [86]:
reuslt=tavily_tool.run("What is the main contribution of the paper 'Attention Is All You Need'?"  )
print(reuslt[0]['content'])

"Attention Is All You Need"( is a 2017 landmark( paper in machine learning authored by eight scientists working at Google. The paper introduced a new deep learning architecture known as the transformer "Transformer (machine learning model)"), based on the attention mechanism proposed in 2014 by Bahdanau et al.( It is considered a foundational( paper in modern artificial intelligence, and a main contributor to the AI boom, as the transformer approach has become the main architecture of a wide [...] In 2017, the original (100M-sized) encoder-decoder transformer model was proposed in the "Attention is all you need" paper. At the time, the focus of the research was on improving seq2seq for machine translation, by removing its recurrence to process all tokens in parallel, but preserving its dot-product attention mechanism to keep its text processing performance.( This led to the introduction of a multi-head attention model that was easier to parallelize due to the use of independent heads a

In [87]:
from langchain_core.tools import tool
@tool
def router_tool(question):
  """Router Function"""
  if 'self-attention' in question:
    return 'vectorstore'
  else:
    return 'web_search'

In [105]:
#creste router agent
router_agent = Agent(
     role='Router',
    goal='Route user question to a vectorstore or web search',
    backstory=(
    "You are an expert at routing a user question to a vectorstore or web search."
    "Use the vectorstore for questions on concept related to Retrieval-Augmented Generation."
    "You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use web-search."
  ),
  verbose=True,
  allow_delegation=False,
  llm=ChatGroq(
        temperature = 0.5,
        model="llama3-8b-8192",
        api_key= GROQ_API_KEY,
        max_tokens=1024,
    ),
)

In [107]:
#Creste Retriever Agent
Retriever_Agent = Agent(
role="Retriever",
goal="Use the information retrieved from the vectorstore to answer the question",
backstory=(
    "You are an assistant for question-answering tasks."
    "Use the information present in the retrieved context to answer the question."
    "You have to provide a clear concise answer."
),
verbose=True,
allow_delegation=False,
llm=ChatGroq(
        temperature = 0.5,
        model="llama3-8b-8192",
        api_key= GROQ_API_KEY,
        max_tokens=1024,
    ),
)

In [108]:
Grader_agent =  Agent(
  role='Answer Grader',
  goal='Filter out erroneous retrievals',
  backstory=(
    "You are a grader assessing relevance of a retrieved document to a user question."
    "If the document contains keywords related to the user question, grade it as relevant."
    "It does not need to be a stringent test.You have to make sure that the answer is relevant to the question."
  ),
  verbose=True,
  allow_delegation=False,
  llm=ChatGroq(
        temperature = 0.5,
        model="llama3-8b-8192",
        api_key= GROQ_API_KEY,
        max_tokens=1024,
    ),
)

In [109]:
hallucination_grader = Agent(
    role="Hallucination Grader",
    goal="Filter out hallucination",
    backstory=(
        "You are a hallucination grader assessing whether an answer is grounded in / supported by a set of facts."
        "Make sure you meticulously review the answer and check if the response provided is in alignmnet with the question asked"
    ),
    verbose=True,
    allow_delegation=False,
    llm=ChatGroq(
        temperature = 0.5,
        model="llama3-8b-8192",
        api_key= GROQ_API_KEY,
        max_tokens=1024,
    ),
)

In [110]:
answer_grader = Agent(
    role="Answer Grader",
    goal="Filter out hallucination from the answer.",
    backstory=(
        "You are a grader assessing whether an answer is useful to resolve a question."
        "Make sure you meticulously review the answer and check if it makes sense for the question asked"
        "If the answer is relevant generate a clear and concise response."
        "If the answer gnerated is not relevant then perform a websearch using 'web_search_tool'"
    ),
    verbose=True,
    allow_delegation=False,
    llm=ChatGroq(
        temperature = 0.5,
        model="llama3-8b-8192",
        api_key= GROQ_API_KEY,
        max_tokens=1024,
    ),
)

In [111]:
#router task
router_task = Task(
    name="Router Task",
    description=(
        "Analyse the keywords in the question {question}."
        "Based on the keywords decide whether it is eligible for a vectorstore search or a web search."
        "Return a single word 'vectorstore' if it is eligible for vectorstore search."
        "Return a single word 'websearch' if it is eligible for web search."
        "Do not provide any other preamble or explanation."
    ),
    expected_output=(
        "Give a binary choice 'websearch' or 'vectorstore' based on the question."
        "Do not provide any other preamble or explanation."
    ),
    agent=router_agent,
    tool=router_tool,
)

In [112]:
retriever_task = Task(
    description=("Based on the response from the router task extract information for the question {question} with the help of the respective tool."
    "Use the web_serach_tool to retrieve information from the web in case the router task output is 'websearch'."
    "Use the rag_tool to retrieve information from the vectorstore in case the router task output is 'vectorstore'."
    ),
    expected_output=("You should analyse the output of the 'router_task'"
    "If the response is 'websearch' then use the web_search_tool to retrieve information from the web."
    "If the response is 'vectorstore' then use the rag_tool to retrieve information from the vectorstore."
    "Return a claer and consise text as response."),
    agent=Retriever_Agent,
    context=[router_task],
)

In [113]:
grader_task = Task(
    description=("Based on the response from the retriever task for the quetion {question} evaluate whether the retrieved content is relevant to the question."
    ),
    expected_output=("Binary score 'yes' or 'no' score to indicate whether the document is relevant to the question"
    "You must answer 'yes' if the response from the 'retriever_task' is in alignment with the question asked."
    "You must answer 'no' if the response from the 'retriever_task' is not in alignment with the question asked."
    "Do not provide any preamble or explanations except for 'yes' or 'no'."),
    agent=Grader_agent,
    context=[retriever_task],
)

In [114]:
hallucination_task = Task(
    description=("Based on the response from the grader task for the quetion {question} evaluate whether the answer is grounded in / supported by a set of facts."),
    expected_output=("Binary score 'yes' or 'no' score to indicate whether the answer is sync with the question asked"
    "Respond 'yes' if the answer is in useful and contains fact about the question asked."
    "Respond 'no' if the answer is not useful and does not contains fact about the question asked."
    "Do not provide any preamble or explanations except for 'yes' or 'no'."),
    agent=hallucination_grader,
    context=[grader_task],
)

In [115]:
answer_task = Task(
    description=("Based on the response from the hallucination task for the quetion {question} evaluate whether the answer is useful to resolve the question."
    "If the answer is 'yes' return a clear and concise answer."
    "If the answer is 'no' then perform a 'websearch' and return the response"),
    expected_output=("Return a clear and concise response if the response from 'hallucination_task' is 'yes'."
    "Perform a web search using 'web_search_tool' and return ta clear and concise response only if the response from 'hallucination_task' is 'no'."
    "Otherwise respond as 'Sorry! unable to find a valid response'."),
    context=[hallucination_task],
    agent=answer_grader,

)

In [116]:
# Create a Crew
rag_crew = Crew(
    agents=[router_agent, Retriever_Agent, Grader_agent, hallucination_grader, answer_grader],
    tasks=[router_task, retriever_task, grader_task, hallucination_task, answer_task],
    verbose=True,

)

In [119]:
inputs = {"question": "Tell me about self-attention mechanism in Transformers?"}
result = rag_crew.kickoff(inputs=inputs)

BadRequestError: litellm.BadRequestError: LLM Provider NOT provided. Pass in the LLM provider you are trying to call. You passed model=llama3-8b-8192
 Pass model as E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/starcoder',..)` Learn more: https://docs.litellm.ai/docs/providers