#Autogen

##Use Case

- Firstly we will upload a pdf or document
- next we will have some questions in normal text format
- then the agents come into action, first agent is to accept the question and check whether it is relevant to the given document
- then the retrieval agent and it will rank the answers accordingly (top 3)
- next answer genertaion
- next validation
- if the answer is correct it will give it to human agent for the feedback and it will store in the vector db
- if the answer is no, then it will start the process from retrieval again


In [None]:
pip install autogen PyPDF2 chromadb groq langchain-community huggingface pypdf

In [None]:
import autogen
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer, util
import torch

# Load Llama model (Assuming Llama2 is hosted locally or via API)
from transformers import pipeline
config_list =[
    {
        "model": "mixtral-8x7b-32768",
        "api_key": "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",
        "api_type":"groq",
        "max_completion_tokens":2000,
    }
]

# Load embeddings model for semantic similarity
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection(name="document_qa")

# Load and process document
file_path = "/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

# Store document chunks in ChromaDB
documents = []
for idx, text in enumerate(texts):
    documents.append(text.page_content)
    chroma_collection.add(
        ids=[str(idx)],
        documents=[text.page_content],
        metadatas=[{"source": f"Page {text.metadata['page']}"}]
    )

# Define AutoGen Agents
#config_list = [{"model": "gemma2-9b-it"}]  # Placeholder, replace with actual model

question_verification_agent = autogen.AssistantAgent(
    name="QuestionVerificationAgent",
    llm_config={"config_list": config_list},
    system_message="Verify if the given question is relevant to the provided document.")

retrieval_agent = autogen.AssistantAgent(
    name="RetrievalAgent",
    llm_config={"config_list": config_list},
    system_message="Retrieve the most relevant passages from the document.")

answer_generation_agent = autogen.AssistantAgent(
    name="AnswerGenerationAgent",
    llm_config={"config_list": config_list},
    system_message="Generate a refined answer based on retrieved document chunks.")

validation_agent = autogen.AssistantAgent(
    name="ValidationAgent",
    llm_config={"config_list": config_list},
    system_message="Validate the generated answer against the retrieved text.")

human_feedback_agent = autogen.AssistantAgent(
    name="HumanFeedbackAgent",
    llm_config={"config_list": config_list},
    system_message="Store validated answers in ChromaDB.")

# Ask a question
question = "What are the key findings in the document?"
relevance_check = question_verification_agent.generate_reply(messages=[{"role": "user", "content": question}])
if "not relevant" in relevance_check:
    print("The question is not relevant to the document.")
else:
    retrieved_texts = retrieval_agent.generate_reply(messages=[{"role": "user", "content": f"Documents: {documents}\n\nQuestion: {question}"}])
    generated_answer = answer_generation_agent.generate_reply(messages=[{"role": "user", "content": f"{retrieved_texts}\n\nQuestion: {question}"}])
    validation_result = validation_agent.generate_reply(messages=[{"role": "user", "content": f"Answer: {generated_answer}\nRetrieved: {retrieved_texts}"}])

    if "valid" in validation_result:
        human_feedback_agent.generate_reply(messages=[{"role": "user", "content": f"Question: {question}\nAnswer: {generated_answer}"}])
        print("Answer:", generated_answer)
    else:
        print("Answer validation failed. Restarting retrieval...")


##Autogen version1 without human feedback

In [None]:
import autogen
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer, util
import torch

# Load Llama model (Assuming Llama2 is hosted locally or via API)
from transformers import pipeline
config_list = [
    {
        "model": "mixtral-8x7b-32768",
        "api_key": "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",
        "api_type": "groq",
        "max_completion_tokens": 2000,
    }
]

# Load embeddings model for semantic similarity
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection(name="document_qa")

# Load and process document
file_path = "/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

# Store document chunks in ChromaDB
documents = []
for idx, text in enumerate(texts):
    documents.append(text.page_content)
    chroma_collection.add(
        ids=[str(idx)],
        documents=[text.page_content],
        metadatas=[{"source": f"Page {text.metadata['page']}"}]
    )

# Function to retrieve top 3 relevant chunks
def retrieve_relevant_chunks(question):
    query_embedding = embedding_model.encode(question, convert_to_tensor=True)

    retrieved_chunks = []
    for doc in documents:
        doc_embedding = embedding_model.encode(doc, convert_to_tensor=True)
        similarity = util.pytorch_cos_sim(query_embedding, doc_embedding)
        retrieved_chunks.append((doc, similarity.item()))

    # Sort by similarity score and take top 3
    retrieved_chunks = sorted(retrieved_chunks, key=lambda x: x[1], reverse=True)[:3]
    return "\n\n".join([chunk[0] for chunk in retrieved_chunks])

# Define AutoGen Agents
question_verification_agent = autogen.AssistantAgent(
    name="QuestionVerificationAgent",
    llm_config={"config_list": config_list},
    system_message="Verify if the given question is relevant to the provided document."
)

retrieval_agent = autogen.AssistantAgent(
    name="RetrievalAgent",
    llm_config={"config_list": config_list},
    system_message="Retrieve the most relevant passages from the document."
)

answer_generation_agent = autogen.AssistantAgent(
    name="AnswerGenerationAgent",
    llm_config={"config_list": config_list},
    system_message="Generate a refined answer based on retrieved document chunks."
)

validation_agent = autogen.AssistantAgent(
    name="ValidationAgent",
    llm_config={"config_list": config_list},
    system_message="Validate the generated answer against the retrieved text."
)

human_feedback_agent = autogen.AssistantAgent(
    name="HumanFeedbackAgent",
    llm_config={"config_list": config_list},
    system_message="Store validated answers in ChromaDB."
)

# Ask a question
question = "What is the future growth of tesla? what are the accounting qualities in tesla? tell me the growth in the last 4 months? what are the different revenue segments in tesla"
relevance_check = question_verification_agent.generate_reply(
    messages=[{"role": "user", "content": f"Document:\n{documents[:5]}\n\nQuestion: {question}"}]  # Passing first 5 chunks
)

if "not relevant" in relevance_check:
    print("The question is not relevant to the document.")
else:
    retrieved_texts = retrieve_relevant_chunks(question)  # Get only top 3 relevant chunks
    retrieved_reply = retrieval_agent.generate_reply(messages=[{"role": "user", "content": f"Relevant Documents: {retrieved_texts}\n\nQuestion: {question}"}])
    generated_answer = answer_generation_agent.generate_reply(messages=[{"role": "user", "content": f"{retrieved_reply}\n\nQuestion: {question}"}])
    validation_result = validation_agent.generate_reply(messages=[{"role": "user", "content": f"Answer: {generated_answer}\nRetrieved: {retrieved_reply}"}])

    if "valid" in validation_result:
        human_feedback_agent.generate_reply(messages=[{"role": "user", "content": f"Question: {question}\nAnswer: {generated_answer}"}])
        print("Answer:", generated_answer)

In [None]:
relevance_check

{'content': "The provided document is a Form 10-Q filed by Tesla, Inc. with the US Securities and Exchange Commission (SEC) for the quarter ended September 30, 2024. This document primarily focuses on Tesla's financial statements, management's discussion of financial condition and results of operations, risk factors, and other relevant information. However, it doesn't provide specific information regarding the future growth of Tesla, accounting qualities, the growth in the last 4 months, or different revenue segments.\n\nTo answer these questions, one would need to refer to additional sources that provide Tesla's strategic plans, financial performance analysis, and segment-wise revenue information. It is also important to note that the company's growth prospects, accounting qualities, and past performance can be assessed by analyzing historical financial reports, industry trends, and expert opinions.",
 'refusal': None,
 'role': 'assistant',
 'audio': None,
 'function_call': None,
 'to

In [None]:
validation_result

{'content': "The generated answer is valid as it accurately summarizes the information provided in the retrieved text. Both the generated answer and the retrieved text discuss Tesla's strong growth in total revenues, the preparation and auditing of Tesla's financial statements, and the company's three main revenue segments.\n\nHowever, there are a few minor differences between the two texts. The generated answer states that total revenues increased from $23,350 million in Q3 2023 to $71,983 million in Q3 2024 when comparing the nine-month periods. However, the retrieved text correctly states that the increase is from Q3 2023 to the same period in 2024. The generated answer also mentions that revenues increased in the last 4 months, while the retrieved text states that the information is not provided. These differences do not affect the validity of the generated answer, as the main points and information are accurately presented.",
 'refusal': None,
 'role': 'assistant',
 'audio': None,

In [None]:
generated_answer

{'content': "The future growth of Tesla has seen an increase in total revenues from $23,350 million in the nine months ended September 30, 2023, to $71,983 million in the same period of 2024, indicating strong growth.\n\nTesla's financial statements are prepared in accordance with generally accepted accounting principles (GAAP), audited by an independent auditing firm, and present financial information in a clear and transparent manner with notes providing additional details and explanations.\n\nIn the last 4 months, Tesla's total revenues have increased from $57,879 million in the three months ended June 30, 2024, to $71,983 million in the nine months ended September 30, 2024, indicating strong growth over this period.\n\nTesla has three main revenue segments:\n1. Automotive: This segment includes the sale of electric vehicles and related regulatory credits, as well as leasing and financing services.\n2. Energy generation and storage: This segment includes the sale of solar energy sys

##Autogen version3 without using groupchat

In [None]:
import autogen
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer, util

# Load Llama model (Assuming Llama2 is hosted locally or via API)
config_list = [
    {
        "model": "mixtral-8x7b-32768",
        "api_key": "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",
        "api_type": "groq",
        "max_completion_tokens": 3500,
    }
]

# Load embeddings model for semantic similarity
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection(name="document_qa")

# Load and process document
file_path = "10-Q_-_Tesla_INC_-_10-24-2024.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

# Store document chunks in ChromaDB
documents = []
for idx, text in enumerate(texts):
    documents.append(text.page_content)
    chroma_collection.add(
        ids=[str(idx)],
        documents=[text.page_content],
        metadatas=[{"source": f"Page {text.metadata['page']}"}]
    )

# Function to retrieve top 3 relevant chunks
def retrieve_relevant_chunks(question):
    query_embedding = embedding_model.encode(question, convert_to_tensor=True)
    retrieved_chunks = []

    for doc in documents:
        doc_embedding = embedding_model.encode(doc, convert_to_tensor=True)
        similarity = util.pytorch_cos_sim(query_embedding, doc_embedding)
        retrieved_chunks.append((doc, similarity.item()))

    retrieved_chunks = sorted(retrieved_chunks, key=lambda x: x[1], reverse=True)[:3]
    return "\n\n".join([chunk[0] for chunk in retrieved_chunks])

question_verification_agent = autogen.AssistantAgent(name="QuestionVerificationAgent",llm_config={"config_list": config_list},
                                                     system_message="Verify if the given question is relevant to the provided document.")

retrieval_agent = autogen.AssistantAgent(name="RetrievalAgent",llm_config={"config_list": config_list},
                                         system_message="Retrieve the most relevant passages from the document.")

answer_generation_agent = autogen.AssistantAgent(name="AnswerGenerationAgent",llm_config={"config_list": config_list},
                                                 system_message="Generate a refined answer based on retrieved document chunks.")

validation_agent = autogen.AssistantAgent(name="ValidationAgent",llm_config={"config_list": config_list},
                                          system_message="Validate the generated answer against the retrieved text.")

human_feedback_agent = autogen.UserProxyAgent(name="HumanFeedbackAgent",system_message="Provide human feedback on the generated answer. Reply 'Yes' if correct, 'No' if incorrect.")


def ask_question(question):
    # Step 1: Question relevance check
    relevance_check = question_verification_agent.generate_reply(messages=[{"role": "user", "content": f"Document:\n{documents[:5]}\n\nQuestion: {question}"}])

    if "not relevant" in relevance_check.get("content", "").lower():
        print("The question is not relevant to the document.")
        return

    while True:
        # Step 2: Retrieve relevant document chunks
        retrieved_texts = retrieve_relevant_chunks(question)
        retrieved_reply = retrieval_agent.generate_reply(messages=[{"role": "user", "content": f"Relevant Documents: {retrieved_texts}\n\nQuestion: {question}"}])
        generated_answer = answer_generation_agent.generate_reply(messages=[{"role": "user", "content": f"{retrieved_reply}\n\nQuestion: {question}"}])
        validation_result = validation_agent.generate_reply(messages=[{"role": "user", "content": f"Answer: {generated_answer}\nRetrieved: {retrieved_reply}"}])

        # Step 3: Human Feedback
        while True:
            print("\n Generated Answer:")
            print(generated_answer.get("content", ""))

            user_feedback = input("\n Is this answer correct? (yes/no): ").strip().lower()

            if user_feedback == "yes":
                chroma_collection.add(
                    ids=[str(len(documents) + 1)],
                    documents=[generated_answer.get("content", "")],  # Extract string
                )
                print(" ✅ Answer stored in ChromaDB.")
                return
            elif user_feedback == "no":
                print(" Feedback rejected. Re-attempting retrieval...")
                break
            else:
                print(" Invalid input. Please type 'yes' or 'no'.")

# Example question
question = "What is the future growth of Tesla? What are the accounting qualities in Tesla? Tell me the growth in the last 4 months? What are the different revenue segments in Tesla?"
ask_question(question)





 Generated Answer:
The future growth of Tesla, as of September 30, 2024, includes total revenues of $71,983 million for the nine months ended, an increase of 16.7% compared to the same period in 2023. The net income attributable to common stockholders for the same period was $4,774 million, an increase of 166.1% compared to 2023.

In terms of accounting qualities, Tesla's consolidated financial statements are prepared in accordance with U.S. GAAP. The company has a history of generating positive net income and cash flows from operations. However, Tesla has a significant amount of intangible assets, such as goodwill and trademarks, which are subject to impairment.

In the last 4 months, specifically the third quarter of 2024, Tesla's total revenues were $25,182 million, an increase of 7.8% compared to the same period in 2023. The net income attributable to common stockholders for this quarter was $2,167 million, an increase of 15.7% compared to 2023.

The different revenue segments in 

## Autogen using round robin groupchat version 4 and main version

In [None]:
import os
import autogen
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer, util

# Load Groq API key from environment variable
api_key = os.getenv("GROQ_API_KEY", "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")
config_list = [
    {
        "model": "mixtral-8x7b-32768",
        "api_key": api_key,
        "api_type": "groq",
        "max_completion_tokens": 3500,
    }
]

# Load embeddings model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB (fresh start)
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_client.delete_collection(name="document_qa")  # Reset to avoid old data
chroma_collection = chroma_client.create_collection(name="document_qa")

# Load and process document
file_path = "10-Q_-_Tesla_INC_-_10-24-2024.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()
print(f"Loaded {len(docs)} pages from {file_path}")
print("Sample content:", docs[0].page_content[:200])

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

# Store document chunks in ChromaDB
documents = []
for idx, text in enumerate(texts):
    documents.append(text.page_content)
    chroma_collection.add(
        ids=[str(idx)],
        documents=[text.page_content],
        metadatas=[{"source": f"Page {text.metadata['page']}"}]
    )

# Function to retrieve top 3 relevant chunks from ChromaDB
def retrieve_relevant_chunks(question):
    query_embedding = embedding_model.encode(question, convert_to_tensor=False).tolist()
    results = chroma_collection.query(query_embeddings=[query_embedding], n_results=3)
    retrieved_texts = results["documents"][0] if "documents" in results and results["documents"] else []
    return "\n\n".join(retrieved_texts) if retrieved_texts else "No relevant chunks found."

# Define AutoGen agents
question_verification_agent = autogen.AssistantAgent(
    name="QuestionVerificationAgent",llm_config={"config_list": config_list},
    system_message="Verify if the given question is relevant to the provided document."
)

retrieval_agent = autogen.AssistantAgent(
    name="RetrievalAgent",llm_config={"config_list": config_list},
    system_message="Retrieve the most relevant passages from the document."
)

answer_generation_agent = autogen.AssistantAgent(
    name="AnswerGenerationAgent",llm_config={"config_list": config_list},
    system_message="Generate a refined answer based on retrieved document chunks."
)

validation_agent = autogen.AssistantAgent(
    name="ValidationAgent",llm_config={"config_list": config_list},
    system_message="Validate the generated answer against the retrieved text."
)

human_feedback_agent = autogen.UserProxyAgent(
    name="HumanFeedbackAgent",
    system_message="Provide human feedback on the generated answer. Reply 'Yes' if correct, 'No' if incorrect."
)

# Define GroupChat and Manager
group_chat = autogen.GroupChat(
    agents=[question_verification_agent, retrieval_agent, answer_generation_agent, validation_agent, human_feedback_agent],messages=[],
    speaker_selection_method="round_robin"
)

manager = autogen.GroupChatManager(
    groupchat=group_chat,
    llm_config={"config_list": config_list}
)

# Function to ask a question
def ask_question(question, retries=3):
    if retries <= 0:
        print("Max retries reached. Please refine your question.")
        return

    retrieved_texts = retrieve_relevant_chunks(question)
    if "No relevant chunks" in retrieved_texts:
        print(retrieved_texts)
        return

    initial_message = f"Relevant Documents:\n{retrieved_texts}\n\nQuestion: {question}"
    print(f"\nInitiating chat with question: {question}")

    chat_result = manager.initiate_chat(
        recipient=question_verification_agent,
        message=initial_message,
        max_turns=5,
    )

    generated_answer = ""
    for msg in chat_result.chat_history:
        if msg["name"] == "AnswerGenerationAgent":
            generated_answer = msg["content"]
            break

    if not generated_answer:
        print("No answer generated by AnswerGenerationAgent.")
        return

    print("\nGenerated Answer:")
    print(generated_answer)

    user_feedback = input("\nIs this answer correct? (yes/no): ").strip().lower()
    if user_feedback == "yes":
        new_id = str(len(documents) + 1)
        chroma_collection.add(
            ids=[new_id],
            documents=[generated_answer]
        )
        print(f"✅ Answer stored in ChromaDB with ID {new_id}.")
    elif user_feedback == "no":
        print("Feedback rejected. Re-attempting retrieval...")
        ask_question(question, retries - 1)

# Example question
question = "What are the Material Cash Requirements in Tesla according to the 10-Q?"
ask_question(question)

##crew-ai (didnt work, asking for openai key)

In [None]:
pip install crewai

In [None]:
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer, util
import torch
from crewai import Task, Agent, Crew
from transformers import pipeline

llm =[
    {
        "model": "gemma2-9b-it",
        "api_key": "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",
        "api_type":"groq",
        "max_completion_tokens":2000,
    }
]

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection(name="document_qa")

loader = PyPDFLoader("/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

for idx, text in enumerate(texts):
    chroma_collection.add(
        ids=[str(idx)],
        documents=[text.page_content],
        metadatas=[{"source": f"Page {text.metadata['page']}"}]
    )

question_verification_agent = Agent(
    name="Question Verification Agent",
    role="Verifier",
    goal="Verify if the given question is relevant to the provided document.",
    backstory="An AI-powered verifier trained to assess whether a question aligns with the document content, ensuring relevant queries are processed further."
)

retrieval_agent = Agent(
    name="Retrieval Agent",
    role="Retriever",
    goal="Retrieve the most relevant passages from the document. Return only the top 3 ranked chunks.",
    backstory="A highly efficient AI-powered retriever designed to fetch the most relevant document sections using advanced semantic search techniques."
)

answer_generation_agent = Agent(
    name="Answer Generation Agent",
    role="Generator",
    goal="Generate a refined answer based on the retrieved document chunks. Keep responses concise.",
    backstory="An advanced language model-based agent that synthesizes information from retrieved text and generates well-structured, accurate answers."
)

validation_agent = Agent(
    name="Validation Agent",
    role="Validator",
    goal="Validate the generated answer against the retrieved text. If invalid, request another retrieval cycle.",
    backstory="A strict AI reviewer ensuring the generated answers align with the retrieved content, preventing misinformation."
)

human_feedback_agent = Agent(
    name="Human Feedback Agent",
    role="Reviewer",
    goal="Store validated answers in ChromaDB.",
    backstory="A human-in-the-loop AI responsible for collecting and incorporating expert feedback into the database for continuous improvement."
)

question_verification_task = Task(
    description="Check if the question is relevant to the document.",
    agent=question_verification_agent,
    expected_output="A response indicating whether the question is relevant or not."
)

retrieval_task = Task(
    description="Retrieve top 3 relevant document chunks.",
    agent=retrieval_agent,
    expected_output="A list of the top 3 relevant document chunks from the database."
)

answer_generation_task = Task(
    description="Generate a concise answer using retrieved chunks.",
    agent=answer_generation_agent,
    expected_output="A well-structured answer generated from the retrieved text."
)

validation_task = Task(
    description="Validate the answer against retrieved text.",
    agent=validation_agent,
    expected_output="A confirmation if the generated answer is valid or a request to redo retrieval."
)

feedback_task = Task(
    description="Store validated answers in ChromaDB.",
    agent=human_feedback_agent,
    expected_output="The validated answer is stored in ChromaDB with feedback."
)

# Create CrewAI workflow
crew = Crew(
    agents=[
        question_verification_agent, retrieval_agent, answer_generation_agent, validation_agent, human_feedback_agent
    ],
    tasks=[question_verification_task, retrieval_task, answer_generation_task, validation_task, feedback_task],
    llm=llm,
)

# Main Process Execution
question = "What are the key findings in the document?"
verification_result = crew.kickoff(inputs={"question": question})

if "not relevant" in verification_result:
    print("The question is not relevant to the document.")
else:
    retrieved_texts = crew.kickoff(inputs={"question": question})
    generated_answer = crew.kickoff(inputs={"retrieved_texts": retrieved_texts})
    validation_result = crew.kickoff(inputs={"answer": generated_answer})

    if "valid" in validation_result:
        crew.kickoff(inputs={"answer": generated_answer}, tasks=[feedback_task])
        print("Answer:", generated_answer)
    else:
        print("Answer validation failed. Reattempting retrieval.")
