<a href="https://colab.research.google.com/github/sheharyarakhtar/MultiAgent/blob/main/RFP_Assesser_CrewAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip -q install langchain langchain_community langchain_google_genai crewai crewai_tools faiss-gpu fastembed
%pip -q install llama-index-embeddings-huggingface
%pip -q install llama-index-embeddings-instructor
%pip -q install -U duckduckgo-search
%pip -q install langchain_huggingface

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from crewai import Agent, Task, Crew, Process
from crewai_tools import SerperDevTool, tool, PDFSearchTool
import pathlib
import textwrap
import google.generativeai as genai
from langchain_community.document_loaders import PyPDFLoader
from google.colab import userdata
from langchain.tools import DuckDuckGoSearchRun
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)
from IPython.display import display, Markdown, Latex

## Simple RAG (Retrieval Augmented Generator) using PDFs

In [None]:
class RFPAssesser:
    def __init__(self, model_name, embeddings_model_name):
        self.llm = ChatGoogleGenerativeAI(model=model_name,
                                          verbose=True,
                                          google_api_key=userdata.get('GOOGLE_API_KEY'),
                                          temperature = 0.3)
        self.embeddings = HuggingFaceEmbeddings(
            model_name=embeddings_model_name,
            show_progress=False
            )
        self.all_pages = []
        self.vectorDB = None
        self.retriever = None
        self.assesser = None

    def EmbedDocuments(self, paths):
        for path in paths:
            loader = PyPDFLoader(path)
            pages = loader.load_and_split()
            self.all_pages.extend(pages)
            print(f"Documents {path} loaded successfully!")

    def createVectorDB(self):
      if not self.all_pages:
          print("No documents loaded.")
          return
      if not self.embeddings:
          print("No embeddings provided.")
          return
      embeddings = self.embeddings.embed_documents([page.page_content for page in self.all_pages])
      if not embeddings:
          print("Failed to generate embeddings.")
          return
      self.vectorDB = FAISS.from_documents(self.all_pages, self.embeddings)
      print("Vector store created!")

    def createRetriever(self):
        self.retriever = VectorStoreRetriever(vectorstore=self.vectorDB)
        self.assesser = RetrievalQA.from_chain_type(llm=self.llm, retriever=self.retriever)
        print("Retriever created!")

    def setup(self, pdfs):
        self.EmbedDocuments(paths=pdfs)
        self.createVectorDB()
        self.createRetriever()

    def run(self, prompt):
      return self.assesser.run(prompt)
pdfs = [i for i in os.listdir() if 'pdf' in i]
assesser = RFPAssesser(
    model_name='gemini-1.5-flash',
    embeddings_model_name="sentence-transformers/msmarco-distilbert-base-v4"
    )
assesser.setup(pdfs)



## Creating tools to be used by LLM Agents

In [None]:
@tool
def Assesser(question: str, assesser = assesser) -> str:
  """
  This tool allows you to query the provided documents.
  You can ask it relevant questions and it will return the answer.
  """
  prompt = f""""
  Context: You are the owner of the RFP documents you have access to.
  You will be asked questions regarding these documents.
  You answer these questions by assessing the documents you have access to.

  Question: {question}
  """
  return assesser.run(prompt)

@tool
def DuckDuckGoSearch(search_query: str):
    """Search the web for information on a given topic"""

    return DuckDuckGoSearchRun().run(search_query)

serper_tool = SerperDevTool()

import json

@tool
def SearchRFP(keywords: str, assesser = assesser) -> str:
    """
    Search the RFP documents with keywords

    keywords: str: Search keyword in the attached documents
    conducts a similarity search of document and results a dictionary of relevant content with page numbers
    """
    results = assesser.vectorDB.similarity_search(keywords)
    overall = {}
    content = ""
    for result in results:
      combine = {
          'Page Number':result.metadata['page'],
          'Content':result.page_content
      }
      overall[result.metadata['source']] = combine
      content = content + result.page_content + "\n\n\n"
    prettyjson = json.dumps(overall, indent=4)
    content = rephrase_text(content)
    return content

def rephrase_text(input_string, model = assesser.llm):
  rephrased = model.invoke(
        f"""
        Context: You are a language model used to rephrase messy text.
        You will be given some messy text and your job is to format it and paraphrase it.
        Do not add anything yourself.
        Remove things that are not relevant i.e. any tables or idle letters. Just rephrase the content in a single or max 2 paragraphs

        Text: {input_string}
        """).content
  return rephrased

@tool
def write_to_file(input_string, file_path):
    """
    Write input_string to a text file at file_path.

    Parameters:
    - input_string (str): The string to write to the file.
    - file_path (str): The path where the file will be created or overwritten.

    Returns:
    - None
    """
    try:
        with open(file_path, 'w') as file:
            file.write(input_string)
        print(f"Successfully wrote to file: {file_path}")
    except IOError as e:
        print(f"Error writing to file {file_path}: {e}")


@tool
def pdf_search(query: str, pdf_path: str) -> str:
  """
  Pass in the query you wish you ask and the path to the PDF you wish to search the query in
  query: string
  pdf_path: string
  The function will return a string explaining what the pdf says about the query
  """

  return rag.run(query)

## Creating Agents and Tasks for Multi Agent Workflow

In [None]:
llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash', verbose=True,google_api_key=userdata.get('GOOGLE_API_KEY'))
pdfs = [i for i in os.listdir() if 'pdf' in i]
rfp_assesser = Agent(
    role = 'RFP Assesser',
    goal="You are a Consultant in the {team_name} team. You need to ask relevant questions to the tool to assess if there are any use-cases related to you",
    backstory = (
        "You are a consultant in the {team_name} team"
        "Clients come to you with their request for proposals"
        "These documents contain their vision for their company, or problems they would like to solve"
        "You are assigned a Assesser tool which you can use to interact with each pdf"
    ),
    verbose = True,
    memory = True,
    llm = llm,
    tools = [RFPAssesser]
)
summarise_usecases = Task(
    description=(
        "Ask questions relevant to you from the data using the tools assigned. You need to find all cases relevant to you as part of the {team_name} team"
        "The RFP does not answer all questions. Where the answers are unavailable, you make educated assumptions and move on."
        "You assess these documents to see where you, being part of the {team_name} team, can help solve some, if any, of these problems"
        ),
    expected_output = "A summary in markdown syntax detailing the use-cases that are relevant to your field, along with their page-numbers. Write these files using write_to_file tool",
    agent = rfp_assesser,
    tools = [RFPAssesser]
)
crew = Crew(
    agents = [rfp_assesser],
    tasks = [summarise_usecases],
    process = Process.sequential
)
crew.kickoff(inputs = {'team_name':'Data Science',
                       'team_tools': 'Data Science',
                       'pdfs': ', '.join(pdfs) })

In [None]:
pdfs = [i for i in os.listdir() if 'pdf' in i]
', '.join(pdfs)

In [None]:
rfp_assesser = Agent(
    role = 'RFP Assesser',
    goal="You are a Consultant in the {team_name} team. You need to ask relevant questions to the tool to assess if there are any use-cases related to you",
    backstory = (

        "You are a consultant in the {team_name} team"
        "Clients come to you with their request for proposals"
        "These documents contain their vision for their company, or problems they would like to solve"
        "You may ask relevant questions to the documents to assess its relevance to your field using the Assesser tool"
        "You dont present a solution for a problem that the client has not explicitly asked you to solve"
        "If there is no problem mentioned explicitly related to your field, you may use the Assesser tool to find which potential use-cases can be created"
        "You also have direct access to the RFP using the SearchRFP tool which results content directly from the document in a string format"
        "DO NOT ask for the entire RFP, only interact with it by asking questions and searching keywords"
        "DO NOT repeat questions, only ask one question once and only search a single keyword once."
        "Ask a maximum of 2-3 questions"
    ),
    verbose = True,
    memory = True,
    llm = assesser.llm,
    tools = [Assesser,  write_to_file],
    max_iterations = 5

)
summarise_usecases = Task(
    description=(
        "Ask questions relevant to you from the data using the tools assigned. You need to find all cases relevant to you as part of the {team_name} team"
        "The RFP does not answer all questions. Where the answers are unavailable, you make educated assumptions and move on."
        "You assess these documents to see where you, being part of the {team_name} team, can help solve some, if any, of these problems"
        ),
    expected_output = "A summary in markdown syntax detailing the use-cases that are relevant to your field, along with their page-numbers. Write these files using write_to_file tool",
    agent = agent,
    tools = [Assesser, write_to_file]
)

researcher = Agent(
    role = "Research Assistant",
    goal = "You will provide cutting edge solutions to the problems presented to you by RFP Assesser",
    backstory = (
        "Your coworker, RFP Assesser, has assessed the client's RFP and come up with potential places where {team_name} can be used"
        "You have access to a search tool which you can use to find relevant literature online"
        "You need to summarise each use-case into an achievable target"
        "Then you will search online to find relevant literature that will help you orchestrate each of these solutions"
    ),
    verbose = True,
    memory = True,
    llm = assesser.llm
)

specifying_task = Task(
    description = (
        "You are provided with a holistic view of where the client wants to make improvements."
        "You need to {num_solutions} solutions from these potential use-cases"
        "You need to be very specific about what problem is being solved and how its being solved"
    ),
    expected_output = "A title and a summary for each usecase presented, using the information gathered by your coworker RFP Assesser",
    agent = researcher
)

literature_review = Task(
    description = (
        "Now that you have specific tasks, you have been given the tools to search online for relevant literature"
        "Search whatever you think is relevant and support your use-cases with proper literature review"
    ),
    expected_output = "Title for each usecase with a paragraph and a URL to link each solution with relevant literature",
    agent = researcher,
    tools = [DuckDuckGoSearch]
)

crew = Crew(
    agents = [rfp_assesser, researcher],
    tasks = [summarise_usecases, specifying_task, literature_review],
    process = Process.sequential
)
result = crew.kickoff(inputs={'team_name': 'Data Science, AI and Machine Learning',
                     'team_tools': 'Data Science, AI and ML',
                              'num_solutions':'4'})


In [None]:
display(Markdown(result))

# Custom MultiAgent workflow I created Myself in OOP

In [None]:

questions = [
    "Can you give requirements of the client?",
    "What platform does client expect us to use? If the client does not mention explicitly, can you recommend one?",
    ]
# result = assesser.run("Can you give requirements of the client?")
# result = assesser.run("What platform does client expect us to use? If the client does not mention explicitly, can you recommend one?")
# result = assesser.run("""
# Assume you are a data science consultant assessing the RFP.
# Create a list of all usecases that can be proposed to the client
# based on what they have mentioned about their needs in the RFP.""")

# result = assesser.run("""
# Assume you are a data science consultant assessing the RFP.
# Create a list of all usecases that can be proposed to the client relating to data science, ai or machine learning
# based on what they have mentioned about their needs in the RFP.
# If there arent any usecases by the client, please recommend some""")
class customAgent:
  def __init__(self, llm, rag, max_iterations, goal, backstory):
    self.rag = rag
    self.llm = llm
    self.max_iterations = max_iterations
    self.goal = goal
    self.backstory = backstory
    self.previous_interactions = ""
    self.prompt = None
    self.task = None
    self.question = None
    self.answer = None
    self.final_output = None
  def ask_question(self):
    for i in range(self.max_iterations):
      self.prompt = f"""
        You are an agent and you will be assigned a task.
        Your backstory is: {self.backstory}
        Your goal is: {self.goal}
        Your previous interactions are: {self.previous_interactions}
        You get to ask the client only {self.max_iterations} questions so be very specific and concise.
      """
      self.task = "Please respond with a single question"
      self.prompt = self.prompt+self.task
      self.question = self.llm.invoke(self.prompt).content
      # print(self.question)
      self.answer = self.rag.run(self.question)
      # print(self.answer)
      # print("===========================================================================")
      self.previous_interactions += f"Question: {self.question}\nAnswer: {self.answer}\n\n"
    self.prompt = f"""
        You are an agent and you will be assigned a task.
        Your backstory is: {self.backstory}
        Your goal is: {self.goal}
        Your previous interactions are: {self.previous_interactions}
        You get to ask the client only {self.max_iterations} questions so be very specific and concise.
      """
    self.task = "Please suggest your solution now based on your assessment of the client based on your interactions. Ask any relevant questions that can be forwarded to the client"
    self.prompt = self.prompt+self.task
    self.final_output = self.llm.invoke(self.prompt).content




agent = customAgent(
    llm = assesser.llm,
    rag = assesser,
    max_iterations = 15,
    goal = """
        Your goal is to find use cases related to data science, AI and machine learning.
        Find atleast 6 usecases.
        Do not be generic, be specific with your usecases.
        Give your output in the format <Usecase Name>: <Brief Detail>
        """,
    backstory = """
    You are a data science consultant.
    You have access to a tool which is a retrieval augmented generator
    This tool has read the documents sent over by the client and acts as the owner of the PDFs
    You can ask it relevant questions related to the document to determine where you can intervene.
    You cannot ask the same question more than once.
    """)
agent.ask_question()
display(Markdown(agent.final_output))


In [None]:
display(Markdown(agent.previous_interactions))