### Importing Necessary Libraries


In [1]:
# from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI 

from langchain.document_loaders import  PyPDFLoader
from langchain.vectorstores import  FAISS
from langchain.text_splitter import  RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings 
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from time import monotonic
from langchain_groq import ChatGroq
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from dotenv import load_dotenv
from langchain.agents import initialize_agent, Tool
from langchain.chains.question_answering import load_qa_chain
from pprint import pprint
import textwrap
import os
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    faithfulness,
    answer_relevancy,
    context_recall,
    answer_similarity
)

from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict
from langchain_core.runnables.graph import MermaidDrawMethod
from IPython.display import display, Image
from langchain.schema import AIMessage
from langchain import hub
from langgraph.prebuilt import create_react_agent
from typing import List, Tuple, Annotated, TypedDict
from langchain_core.pydantic_v1 import BaseModel, Field

import operator
import langgraph

### Helper functions for the notebook
from helper_functions import num_tokens_from_string, replace_t_with_space, replace_double_lines_with_one_line, split_into_chapters,\
analyse_metric_results, escape_quotes, format_state_past_steps, clean_empty_fields_dictionary, process_replanner_output

load_dotenv()

os.environ["PYDEVD_WARN_EVALUATION_TIMEOUT"] = "100000"

  from .autonotebook import tqdm as notebook_tqdm


### Setting Preferred Encoding for PyPDF on Google Colab


In [2]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding # For using PyPDF on google colab 

### Setting OPENAI and GROQ API keys

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

### Defining Path to Harry Potter PDF


In [4]:
hp_pdf_path ="Harry_Potter_Book_1_The_Sorcerers_Stone.pdf"

### Splitting the PDF into Chapters and Preprocessing


In [5]:
chapters = split_into_chapters(hp_pdf_path) 
chapters = replace_t_with_space(chapters)
print(len(chapters))

17


### Defining Prompt Template for Summarization


In [6]:
summarization_prompt_template = """Write an extensive summary of about of the following:

{text}

SUMMARY:"""

summarization_prompt = PromptTemplate(template=summarization_prompt_template, input_variables=["text"])

### Defining Function to Create Chapter Summaries using LLMs


In [7]:
def create_chapter_summary(chapter):
    """
    Creates a summary of a chapter using a large language model (LLM).

    Args:
        chapter: A Document object representing the chapter to summarize.

    Returns:
        A Document object containing the summary of the chapter.
    """

    chapter_txt = chapter.page_content  # Extract chapter text
    model_name = "gpt-3.5-turbo-0125"  # Specify LLM model
    llm = ChatOpenAI(temperature=0, model_name=model_name)  # Create LLM instance
    gpt_35_turbo_max_tokens = 16000  # Maximum token limit for the LLM
    verbose = False  # Set to True for more detailed output

    # Calculate number of tokens in the chapter text
    num_tokens = num_tokens_from_string(chapter_txt, model_name)

    # Choose appropriate chain type based on token count
    if num_tokens < gpt_35_turbo_max_tokens:
        chain = load_summarize_chain(llm, chain_type="stuff", prompt=summarization_prompt, verbose=verbose)
    else:
        chain = load_summarize_chain(llm, chain_type="map_reduce", map_prompt=summarization_prompt, combine_prompt=summarization_prompt, verbose=verbose)

    start_time = monotonic()  # Start timer
    doc_chapter = Document(page_content=chapter_txt)  # Create Document object for chapter
    summary = chain.invoke([doc_chapter])  # Generate summary using the chain
    print(f"Chain type: {chain.__class__.__name__}")  # Print chain type
    print(f"Run time: {monotonic() - start_time}")  # Print execution time

    # Clean up summary text
    summary = replace_double_lines_with_one_line(summary["output_text"])

    # Create Document object for summary
    doc_summary = Document(page_content=summary, metadata=chapter.metadata)

    return doc_summary

### Generating Summaries for Each Chapter


In [None]:
chapter_summaries = []
for chapter in chapters:
    chapter_summaries.append(create_chapter_summary(chapter))

### Function to Encode a Book into a Vector Store using OpenAI Embeddings


In [9]:
def encode_book(path, chunk_size=1000, chunk_overlap=200):
    """
    Encodes a PDF book into a vector store using OpenAI embeddings.

    Args:
        path: The path to the PDF file.
        chunk_size: The desired size of each text chunk.
        chunk_overlap: The amount of overlap between consecutive chunks.

    Returns:
        A FAISS vector store containing the encoded book content.
    """

    # Load PDF documents
    loader = PyPDFLoader(path)
    documents = loader.load()

    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
    )
    texts = text_splitter.split_documents(documents)
    cleaned_texts = replace_t_with_space(texts)

    # Create embeddings and vector store
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(cleaned_texts, embeddings)

    return vectorstore

### Encoding Chapter Summaries into Vector Store


In [10]:
def encode_chapter_summaries(chapter_summaries):
    """
    Encodes a list of chapter summaries into a vector store using OpenAI embeddings.

    Args:
        chapter_summaries: A list of Document objects representing the chapter summaries.

    Returns:
        A FAISS vector store containing the encoded chapter summaries.
    """

    embeddings = OpenAIEmbeddings()  # Create OpenAI embeddings
    chapter_summaries_vectorstore = FAISS.from_documents(chapter_summaries, embeddings)  # Create vector store
    return chapter_summaries_vectorstore

### Creating Vector Stores and Retrievers for Book and Chapter Summaries


In [3]:
# ### IF VECTOR STORES ALREADY EXIST, LOAD THEM
if os.path.exists("chunks_vector_store") and os.path.exists("chapter_summaries_vector_store"):
    embeddings = OpenAIEmbeddings()
    chunks_vector_store =  FAISS.load_local("chunks_vector_store", embeddings, allow_dangerous_deserialization=True)
    chapter_summaries_vector_store =  FAISS.load_local("chapter_summaries_vector_store", embeddings, allow_dangerous_deserialization=True)

else:
    chunks_vector_store = encode_book(hp_pdf_path, chunk_size=1000, chunk_overlap=200)
    chapter_summaries_vector_store = encode_chapter_summaries(chapter_summaries)

    chunks_vector_store.save_local("chunks_vector_store") # save the chunks_vector_store
    chapter_summaries_vector_store.save_local("chapter_summaries_vector_store") # save the chapter_summaries_vector_store


  warn_deprecated(


### Create retrievers from the vector stores

In [4]:
chunks_query_retriever = chunks_vector_store.as_retriever(search_kwargs={"k": 1})     
chapter_summaries_query_retriever = chapter_summaries_vector_store.as_retriever(search_kwargs={"k": 1})

## Create graph nodes and LLM function for the nodes

### Agrregate retrieved content as string context

In [214]:
def retrieve_context_per_question(state):
 
    # Retrieve relevant documents
    print("Retrieving relevant chunks...")
    question = state["question"]
    docs = chunks_query_retriever.get_relevant_documents(question)

    # Concatenate document content
    context = " ".join(doc.page_content for doc in docs)


    print("Retrieving relevant chapter summaries...")
    question = state["question"]

    docs_summaries = chapter_summaries_query_retriever.get_relevant_documents(state["question"])


    # Concatenate chapter summaries with citation information
    context_summaries = " ".join(
        f"{doc.page_content} (Chapter {doc.metadata['chapter']})" for doc in docs_summaries
    )

    all_contexts = context + context_summaries
    all_contexts = escape_quotes(all_contexts)

    return {"context": all_contexts, "question": question}



### LLM based function to distill only relevant retrieved content

In [243]:
keep_only_relevant_content_prompt_template = """you receive a query: {query} and retrieved docuemnts: {retrieved_documents} from a vector store.
 You need to filter the retrieved data and keep only the sentences that are relevant, but all of them.
 you should output the distilled content in a json format. 
 REMEMBER: the output has to be a json containing ALL the relevant sentences, and not the answer to the query. output
  **ONLY** the json format without any additional text {format_instructions}"""

# class RelevantContent(BaseModel):
#     """Relevant content from the retrieved documents that is relevant to the query."""
#     relevant_content: List[str] = Field(description="The relevant content from the retrieved documents that is relevant to the query.")


class KeepRelevantContent(BaseModel):
    relevant_content: str = Field(description="The relevant content from the retrieved documents that is relevant to the query.")
    explanation: str = Field(description="An explanation of why the content is relevant to the query")


keep_only_relevant_content_json_parser = JsonOutputParser(pydantic_object=KeepRelevantContent)

keep_only_relevant_content_prompt = PromptTemplate(
    template=keep_only_relevant_content_prompt_template,
    input_variables=["query", "retrieved_documents"],
    partial_variables={"format_instructions": keep_only_relevant_content_json_parser.get_format_instructions()}, 
)


# keep_only_relevant_content_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=4000)
keep_only_relevant_content_llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125", max_tokens=2000)
keep_only_relevant_content_chain = keep_only_relevant_content_prompt | keep_only_relevant_content_llm | keep_only_relevant_content_json_parser
# keep_only_relevant_content_chain = keep_only_relevant_content_prompt | keep_only_relevant_content_llm.with_structured_output(RelevantContent)



def keep_only_relevant_content(state):
    """
    Keeps only the relevant content from the retrieved documents that is relevant to the query.

    Args:
        question: The query question.
        context: The retrieved documents.
        chain: The LLMChain instance.

    Returns:
        The relevant content from the retrieved documents that is relevant to the query.
    """
    question = state["question"]
    context = state["context"]

    input_data = {
    "query": question,
    "retrieved_documents": context
}
    print("keeping only the relevant content...")
    # Invoke the chain to keep only the relevant content
    output = keep_only_relevant_content_chain.invoke(input_data)
    relevant_content = output["relevant_content"]
    relevant_content = "".join(relevant_content)
    relevant_content = escape_quotes(relevant_content)

    return {"context": relevant_content, "question": question}



### LLM based function to re-write a question

In [207]:
### Question Re-writer

class RewriteQuestion(BaseModel):
    """
    Output schema for the rewritten question.
    """
    rewritten_question: str = Field(description="The improved question optimized for vectorstore retrieval.")
    explanation: str = Field(description="The explanation of the rewritten question.")

rewrite_question_string_parser = JsonOutputParser(pydantic_object=RewriteQuestion)


rewrite_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=4000)
rewrite_prompt_template = """You are a question re-writer that converts an input question to a better version optimized for vectorstore retrieval.
 Analyze the input question {question} and try to reason about the underlying semantic intent / meaning.
 {format_instructions}
 """

rewrite_prompt = PromptTemplate(
    template=rewrite_prompt_template,
    input_variables=["question"],
    partial_variables={"format_instructions": rewrite_question_string_parser.get_format_instructions()},
)

question_rewriter = rewrite_prompt | rewrite_llm | rewrite_question_string_parser  # Combine prompt, LLM, and parser

def rewrite_question(state):
    """Rewrites the given question using the LLM."""
    question = state["question"]
    print("Rewriting the question...")
    result = question_rewriter.invoke({"question": question})
    new_question = result["rewritten_question"]
    return {"question": new_question}

### LLM based function to answer a question given context

In [8]:
class QuestionAnswerFromContext(BaseModel):
    answer_based_on_content: str = Field(description="generates an answer to a query based on a given context.")
    explanation: str = Field(description="The explanation of the answer.")

question_answer_from_context_json_parser = JsonOutputParser(pydantic_object=QuestionAnswerFromContext)
question_answer_from_context_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=4000)

question_answer_from_context_prompt_template = """you receive a query: {query} and a context: {context}. 
You need to answer the query from the context. the output has to be a json containing the answer to the query.
 {format_instructions}"""

question_answer_from_context_prompt = PromptTemplate(
    template=question_answer_from_context_prompt_template,
    input_variables=["query", "context"],
    partial_variables={"format_instructions": question_answer_from_context_json_parser.get_format_instructions()},
)
question_answer_from_context_chain = question_answer_from_context_prompt | question_answer_from_context_llm | question_answer_from_context_json_parser

def answer_question_from_context(state):
    """
    Answers a question from a given context.

    Args:
        question: The query question.
        context: The context to answer the question from.
        chain: The LLMChain instance.

    Returns:
        The answer to the question from the context.
    """
    question = state["question"]
    context = state["context"]

    input_data = {
    "query": question,
    "context": context
}
    print("Answering the question from the retrieved context...")

    # Invoke the chain to answer the question from the context
    output = question_answer_from_context_chain.invoke(input_data)
    answer = output["answer_based_on_content"]
    return {"answer": answer, "context": context, "question": question}


## Create graph edges

### LLM based function to determine if retrieved content is relevant to question

In [81]:
is_relevant_content_prompt_template = """you receive a query: {query} and a context: {context} retrieved from a vector store. 
You need to determine if the document is relevant to the query. 

{format_instructions}"""

class Relevance(BaseModel):
    is_relevant: bool = Field(description="Whether the document is relevant to the query.")
    explanation: str = Field(description="An explanation of why the document is relevant or not.")

is_relevant_json_parser = JsonOutputParser(pydantic_object=Relevance)
is_relevant_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=4000)

is_relevant_content_prompt = PromptTemplate(
    template=is_relevant_content_prompt_template,
    input_variables=["query", "context"],
    partial_variables={"format_instructions": is_relevant_json_parser.get_format_instructions()},
)
is_relevant_content_chain = is_relevant_content_prompt | is_relevant_llm | is_relevant_json_parser

def is_relevant_content(state):
    """
    Determines if the document is relevant to the query.

    """

    question = state["question"]
    context = state["context"]

    input_data = {
    "query": question,
    "context": context
}

    # Invoke the chain to determine if the document is relevant
    output = is_relevant_content_chain.invoke(input_data)
    print("Determining if the document is relevant...")
    if output["is_relevant"] == True:
        print("The document is relevant.")
        return "relevant"
    else:
        print("The document is not relevant.")
        return "not relevant"
    


### LLM chain to check if an answer is hallucination

In [10]:
class is_grounded_on_facts(BaseModel):
    """
    Output schema for the rewritten question.
    """
    grounded_on_facts: bool = Field(description="Answer is grounded in the facts, 'yes' or 'no'")
    explanation: str = Field(description="An explanation of why the answer is grounded in the facts or not.")

grounded_on_facts_parser = JsonOutputParser(pydantic_object=is_grounded_on_facts)
is_grounded_on_facts_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=4000)
is_grounded_on_facts_prompt_template = """You are a fact-checker that determines if the answer to the question is grounded in the facts.
 Analyze the input context {context} and the answer {answer} and determine if the answer is grounded in the facts.
 {format_instructions}
 """
is_grounded_on_facts_prompt = PromptTemplate(
    template=is_grounded_on_facts_prompt_template,
    input_variables=["context", "answer"],
    partial_variables={"format_instructions": grounded_on_facts_parser.get_format_instructions()},
)
is_grounded_on_facts_chain = is_grounded_on_facts_prompt | is_grounded_on_facts_llm | grounded_on_facts_parser




### LLM chain to determine if a question can be fully answered given a context

In [11]:
can_be_answered_prompt_template = """You receive a query: {question} and a context: {context}. 
You need to determine if the question can be fully answered based on the context.
{format_instructions}
"""

class QuestionAnswer(BaseModel):
    can_be_answered: bool = Field(description="binary result of whether the question can be fully answered or not")
    explanation: str = Field(description="An explanation of why the question can be fully answered or not.")

can_be_answered_json_parser = JsonOutputParser(pydantic_object=QuestionAnswer)

answer_question_prompt = PromptTemplate(
    template=can_be_answered_prompt_template,
    input_variables=["question","context"],
    partial_variables={"format_instructions": can_be_answered_json_parser.get_format_instructions()},
)

can_be_answered_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=4000)
can_be_answered_chain = answer_question_prompt | can_be_answered_llm | can_be_answered_json_parser

    

### function to check both cases - hallucination and full answer

In [12]:
def grade_generation_v_documents_and_question(state):
    """Determines if the answer to the question is grounded in the facts."""
    print("Checking if the answer is grounded in the facts...")
    context = state["context"]
    answer = state["answer"]
    question = state["question"]
    
    result = is_grounded_on_facts_chain.invoke({"context": context, "answer": answer})
    grounded_on_facts = result["grounded_on_facts"]
    if not grounded_on_facts:
        print("The answer is hallucination.")
        return "hallucination"
    else:
        print("The answer is grounded in the facts.")

        input_data = {
            "question": question,
            "context": context
        }

        # Invoke the chain to determine if the question can be answered
        print("Determining if the question is fully answered...")
        output = can_be_answered_chain.invoke(input_data)
        can_be_answered = output["can_be_answered"]
        if can_be_answered == True:
            print("The question can be fully answered.")
            return "useful"
        else:
            print("The question cannot be fully answered.")
            return "not_useful"
     

### Test a pipeline of all parts

In [None]:
init_state = {"question": "who is harry?"}
context_state = retrieve_context_per_question(init_state)
relevant_content_state = keep_only_relevant_content(context_state)
is_relevant_content_state = is_relevant_content(relevant_content_state)
answer_state = answer_question_from_context(relevant_content_state)
final_answer = grade_generation_v_documents_and_question(answer_state)
print(final_answer["answer"])

## Build the Graph

In [None]:
class QualitativeRetievalAnswerGraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        context: context
        answer: answer
    """

    question: str
    context: str
    answer: str

qualitative_retrieval_answer_workflow = StateGraph(QualitativeRetievalAnswerGraphState)

# Define the nodes
qualitative_retrieval_answer_workflow.add_node("retrieve_context_per_question",retrieve_context_per_question)
qualitative_retrieval_answer_workflow.add_node("keep_only_relevant_content",keep_only_relevant_content)
qualitative_retrieval_answer_workflow.add_node("rewrite_question",rewrite_question)
qualitative_retrieval_answer_workflow.add_node("answer_question_from_context",answer_question_from_context)

# Build the graph
qualitative_retrieval_answer_workflow.set_entry_point("retrieve_context_per_question")
qualitative_retrieval_answer_workflow.add_edge("retrieve_context_per_question", "keep_only_relevant_content")
qualitative_retrieval_answer_workflow.add_conditional_edges(
    "keep_only_relevant_content",
    is_relevant_content,
    {"relevant":"answer_question_from_context",
      "not relevant":"rewrite_question"},
    )
qualitative_retrieval_answer_workflow.add_edge("rewrite_question", "retrieve_context_per_question")
qualitative_retrieval_answer_workflow.add_conditional_edges(
"answer_question_from_context",
grade_generation_v_documents_and_question,
{"hallucination":"answer_question_from_context",
"not_useful":"rewrite_question",
"useful":END},

)

qualitative_retrieval_answer_retrival_app = qualitative_retrieval_answer_workflow.compile()

display(
    Image(
        qualitative_retrieval_answer_retrival_app.get_graph().draw_mermaid_png(
            draw_method=MermaidDrawMethod.API,
        )
    )
)



<IPython.core.display.Image object>

### This graph can solve questions that can be deduced from data that was drawn directly by retrieving information using semantic similarity. For more complex tasks, we need a more sophisticated agent. For this, we may first break down this graph into two subgraphs that will serve as tools for a more intelligent agent.

### First sub graph would be for retrieval and distillation of the relevant information

In [184]:
class QualitativeRetrievalGraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        context: context
        answer: answer
    """

    question: str
    context: str

qualitative_retrieval_workflow = StateGraph(QualitativeRetrievalGraphState)

# Define the nodes
qualitative_retrieval_workflow.add_node("retrieve_context_per_question",retrieve_context_per_question)
qualitative_retrieval_workflow.add_node("keep_only_relevant_content",keep_only_relevant_content)
qualitative_retrieval_workflow.add_node("rewrite_question",rewrite_question)

# Build the graph
qualitative_retrieval_workflow.set_entry_point("retrieve_context_per_question")
qualitative_retrieval_workflow.add_edge("retrieve_context_per_question", "keep_only_relevant_content")
qualitative_retrieval_workflow.add_edge("rewrite_question", "retrieve_context_per_question")

qualitative_retrieval_workflow.add_conditional_edges(
    "keep_only_relevant_content",
    is_relevant_content,
    {"relevant":END,
      "not relevant":"rewrite_question"},
    )


qualitative_retrieval_workflow_app = qualitative_retrieval_workflow.compile()

display(
    Image(
        qualitative_retrieval_workflow_app.get_graph().draw_mermaid_png(
            draw_method=MermaidDrawMethod.API,
        )
    )
)

<IPython.core.display.Image object>

### The second sub graph will be a component that answers a question given context, without hallucinations

In [185]:
class QualitativeAnswerGraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        context: context
        answer: answer
    """

    question: str
    context: str
    answer: str

qualitative_answer_workflow = StateGraph(QualitativeAnswerGraphState)

# Define the nodes

qualitative_answer_workflow.add_node("answer_question_from_context",answer_question_from_context)

# Build the graph
qualitative_answer_workflow.set_entry_point("answer_question_from_context")

qualitative_answer_workflow.add_conditional_edges(
"answer_question_from_context",
grade_generation_v_documents_and_question,
{"hallucination":"answer_question_from_context",
"not_useful":"answer_question_from_context",
"useful":END},

)

qualitative_answer_workflow_app = qualitative_answer_workflow.compile()

display(
    Image(
        qualitative_answer_workflow_app.get_graph().draw_mermaid_png(
            draw_method=MermaidDrawMethod.API,
        )
    )
)

<IPython.core.display.Image object>

In [16]:
# def run_qualitative_retrieval_answer_graph(question: str):
#     inputs = {"question": question}
#     for output in qualitative_retrieval_answer_retrival_app.stream(inputs):
#         for key, value in output.items():
#             pass  # Node
#             # ... (your existing code)
#         pprint("--------------------")
#     print(value)
#     return AIMessage(
#         content=value,  # The actual answer
#     )

In [271]:
# def run_qualitative_retrieval_workflow(question: str):
#     print("Running the qualitative retrieval workflow...")
#     inputs = {"question": question}
#     for output in qualitative_retrieval_workflow_app.stream(inputs):
#         for key, value in output.items():
#             pass  # Node
#             # ... (your existing code)
#         pprint("--------------------")
#     print(f' the question was {question} and the context is {value}')
#     return AIMessage(
#         content=value, # The distilled retrieved content
#     )

def run_qualitative_retrieval_workflow(state):
    print("Running the qualitative retrieval workflow...")
    question = state["question"]
    inputs = {"question": question}
    for output in qualitative_retrieval_workflow_app.stream(inputs):
        for key, value in output.items():
            pass  # Node
            # ... (your existing code)
        pprint("--------------------")
    print(f' the question was {question} and the context is {value}')
    return AIMessage(
        content=value, # The distilled retrieved content
    )

In [272]:
def run_qualtative_answer_workflow(state):
    print("Running the qualitative answer workflow...")
    question = state["question"]
    context = state["context"]
    inputs = {"question": question, "context": context}
    for output in qualitative_answer_workflow_app.stream(inputs):
        for key, value in output.items():
            pass  # Node
            # ... (your existing code)
        pprint("--------------------")
    print(f' the question was {question} and the context is {context} and the answer is {value}')
    return AIMessage(
        content=value,  # The answer to a question based on a given context
    )

# class QualitativeAnswerInput(BaseModel):
#     question: str = Field(description="The question to be answered.")
#     context: str = Field(description="The context from which to answer the question.")

# def run_qualtative_answer_workflow(inputs: QualitativeAnswerInput):
#     """Answers a question from a given context."""
#     print("Running the qualitative answer workflow...")
#     question = inputs.question
#     context = inputs.context
#     inputs = {"question": question, "context": context}
#     for output in qualitative_answer_workflow_app.stream(inputs):
#         for key, value in output.items():
#             pass  # Node
#             # ... (your existing code)
#         print("--------------------")
#     print(
#         f" the question was {question} and the context is {context} and the answer is {value}"
#     )
#     return value  # Return just the answer




In [273]:
from langchain.tools import StructuredTool

tools = [
    Tool(
        name="run_qualitative_retrieval_graph",
        description="retrieves relevant content based on query from vector stores, and distills the relevant information.",
        # func= lambda question: run_qualitative_retrieval_workflow(question),
                func= run_qualitative_retrieval_workflow,

    ),
    Tool(
        name = "run_qualtative_answer_workflow",
        description = "answers a question from a given context. use this tool with two arguments: question and context.",
        # func = lambda question, context: run_qualtative_answer_workflow(question, context),
        func = run_qualtative_answer_workflow,

    # )

    # StructuredTool.from_function(
    # func=run_qualtative_answer_workflow,
    # name="run_qualtative_answer_workflow",
    # description="Answers a question from a given context",
    # args_schema=QualitativeAnswerInput,  # Specify the input schema
)
]


In [19]:
# tools = [
#     Tool(
#         name = "retrieval_graph",
#         description="A graph that retrieves relevant information from a vector store based on a given question.",
#         func = lambda query: run_retrieval_graph(query),
#     )
# ]

# ### TODO: the tools below is for debugging only, change it to the above tools

# def run_retrieve_context(question: str):
#     inputs = {"question": question}
#     state = retrieve_context_per_question(inputs)
#     return state
# tools = [
# Tool(
#     name="run_retrieve_context",
#     func=lambda question: run_retrieve_context(question),
#     description="Retrieves relevant documents based on a given question.",
# ),
#  Tool(
#         name = "retrieval_graph",
#         description="A graph that retrieves relevant information from a vector store based on a given question and answers the question.",
#         func = lambda query: run_qualitative_retrieval_answer_graph(query),
#     )
# ]

In [274]:
agent_llm_prompt = """ you are a helpful assistant. your goal is to assist the user in finding the information they need.
some of the tools you have at your disposal are:
tool A: a tool that retrieves relevant information based on a given question.
Tool B: a tool that answers a question from a given context.
Always before you decide to respond your answer to the user, construct the answer using Tool B.
"""


class Response(BaseModel):
    """Response to user."""

    response: str = Field(description="Response to user.")

response_parser = JsonOutputParser(pydantic_object=Response)

# Choose the LLM that will drive the agent


# model_name = "gpt-3.5-turbo-0125"  # Specify LLM model
model_name = "gpt-4o"  # Specify LLM model

agent_llm = ChatOpenAI(temperature=0, model=model_name, max_tokens=2000)  # Create LLM instance
# agent_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=2000)


agent_executor = create_react_agent(agent_llm, tools, messages_modifier=agent_llm_prompt) 


In [281]:
class PlanExecute(TypedDict):
    question: str
    anonymized_question: str
    plan: List[str]
    past_steps: Annotated[List[Tuple], operator.add]
    response: str
    mapping: dict 
    context: str
    aggregated_context: str
    past_steps: List[str]
    tool: str

In [283]:
tasks_handler_prompt_template = """You are a task handler that receives a task {curr_task} and have to decide with tool to use to execute the task.
You have the following tools at your disposal:
Tool A: a tool that retrieves relevant information from a vector store based on a given query.
Tool B: a tool that answers a question from a given context.
You also have aggregated context {aggregated_context} that you can use to make decisions, and use the information from that context as input to the tools.
You also have the past steps {past_steps} that you can use to make decisions and understand the context of the task.
You also have the initial user's question {input} that you can use to make decisions and understand the context of the task.
if you decide to use Tool A, output the query to be used for the tool and also that the tool to be used is Tool A.
if you decide to use Tool B, output the question to be used for the tool, the context, and also that the tool to be used is Tool B.
"""

class TaskHandlerOutput(BaseModel):
    """Output schema for the task handler."""
    query: str = Field(description="The query to be either retrieved from the vector store, or the question that should be answered from context.")
    context: str = Field(description="The context to be based on in order to answer the query.")
    tool: str = Field(description="The tool to be used should be either 'retrieve' or 'answer_from_context'.")


task_handler_prompt = PromptTemplate(
    template=tasks_handler_prompt_template,
    input_variables=["curr_task", "aggregated_context", "past_steps", "input"],
)

task_handler_llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125", max_tokens=2000)
task_handler_chain = task_handler_prompt | task_handler_llm.with_structured_output(TaskHandlerOutput)

In [None]:
### test the task handler chain
def run_task_handler_chain(curr_task: str, aggregated_context: str, past_steps: List[str], input: str):
    """Runs the task handler chain."""
    inputs = {"curr_task": curr_task, "aggregated_context": aggregated_context}
    output = task_handler_chain.invoke(inputs)
    return output





In [263]:
class Plan(BaseModel):
    """Plan to follow in future"""

    steps: List[str] = Field(
        description="different steps to follow, should be in sorted order"
    )

planner_prompt =""" For the given query {question}, come up with a simple step by step plan of how to figure out the answer. 

This plan should involve individual tasks, that if executed correctly will yield the correct answer. Do not add any superfluous steps. 
The result of the final step should be the final answer. Make sure that each step has all the information needed - do not skip steps.

when building the plan take in account that you have is access to TWO TOOLS:
1) function A: that retrieves both vector stores of chunks of a book and vector stores of the same book chapter summaries for a given query and distills the relevant information.
use this tool to retrieve the relevant documents based on every query you want.

2) function B: answers a question from a given context, based on the retrieved documents, without hallucination.
use this tool to answer the question based on the retrieved documents.

"""

planner_prompt = PromptTemplate(
    template=planner_prompt,
      input_variables=["question"], 
     )

planner_llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=2000)

planner = planner_prompt | planner_llm.with_structured_output(Plan)


In [56]:
question = {"question": "how did harry beat quirrell?"}
my_plan = planner.invoke(question)
print(my_plan)

steps=["Use function A to retrieve relevant documents and chapter summaries for the query 'how did Harry beat Quirrell?'.", "Use function B to answer the question 'how did Harry beat Quirrell?' based on the retrieved documents and summaries from function A."]


In [264]:
class Response(BaseModel):
    """Response to user."""

    response: str


class ActPossibleResults(BaseModel):
    """Possible results of the action."""
    response: Response = Field(description="Response to user.")
    plan: Plan = Field(description="Plan to follow in future.")
    explanation: str = Field(description="Explanation of the action.")
    

act_possible_results_parser = JsonOutputParser(pydantic_object=ActPossibleResults)

replanner_prompt_template =""" For the given objective, come up with a simple step by step plan of how to figure out the answer. 
This plan should involve individual tasks, that if executed correctly will yield the correct answer. Do not add any superfluous steps. 
The result of the final step should be the final answer. Make sure that each step has all the information needed - do not skip steps.

when building the plan take in account that you have is access to TWO TOOLS:
1) function A: that retrieves both vector stores of chunks of a book and vector stores of the same book chapter summaries for a given query and distills the relevant information.
use this tool to retrieve the relevant documents based on every query you want.

2) function B: answers a question from a given context, based on the retrieved documents, without hallucination.
use this tool to answer the question based on the retrieved documents.


Your objective was this:
{question}

Your original plan was this:
{plan}

You have currently done the follow steps:
{past_steps}

Update your plan accordingly. If no more steps are needed and you can return to the user,
then respond with only the final answer. If further steps are needed, fill out the plan with only those steps.
Do not return previously done steps as part of the plan.

the format is json so escape quotes and new lines.

{format_instructions}

"""

replanner_prompt = PromptTemplate(
    template=replanner_prompt_template,
    input_variables=["question", "plan", "past_steps"],
    partial_variables={"format_instructions": act_possible_results_parser.get_format_instructions()},
)

replanner_llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=2000)



replanner = replanner_prompt | replanner_llm | act_possible_results_parser


In [265]:
class AnonymizeQuestion(BaseModel):
    """Anonymized question and mapping."""
    anonymize_question : str = Field(description="Anonymized question.")
    mapping: dict = Field(description="Mapping of original words to variables.")
    explanation: str = Field(description="Explanation of the action.")

anonymize_question_parser = JsonOutputParser(pydantic_object=AnonymizeQuestion)


anonymize_question_prompt_template = """ You are a question anonymizer. The input You receive is a string containing several words that
 construct a question {question}. Your goal is to changes all name entities in the input strings to variables. keep those variables
in the mapping dictionary do it here, don't write a code that does this. .just output the anonymized_question and mapping dictionary in this format: {format_instructions}"""


anonymize_question_prompt = PromptTemplate(
    template=anonymize_question_prompt_template,
    input_variables=["question"],
    partial_variables={"format_instructions": anonymize_question_parser.get_format_instructions()},
)

anonymize_question_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=2000)
anonymize_question_chain = anonymize_question_prompt | anonymize_question_llm | anonymize_question_parser


In [266]:
class DeAnonymizePlan(BaseModel):
    """Possible results of the action."""
    plan: List = Field(description="Plan to follow in future.")
    explanation: str = Field(description="Explanation of the action.")

deanonymize_plan_parser = JsonOutputParser(pydantic_object=DeAnonymizePlan)


de_anonymize_plan_prompt_template = """ you receive a list of tasks: {plan}, where some of the words are replaced with mapped variables. you also receive
 the mapping for those variables to words {mapping}. you should create the new list of tasks, where all the mapped variables are now their mapped words. do it here, don't write a code that does this.
 if no variables are present, return the original list of tasks. in any case, just output the updated list of tasks in a json format as described here, without any additional text apart from the 
  json format. {format_instructions}"""


de_anonymize_plan_prompt = PromptTemplate(
    template=de_anonymize_plan_prompt_template,
    input_variables=["plan", "mapping"],
    partial_variables={"format_instructions": deanonymize_plan_parser.get_format_instructions()},
)

de_anonymize_plan_llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=groq_api_key, max_tokens=2000)
de_anonymize_plan_chain = de_anonymize_plan_prompt | de_anonymize_plan_llm | deanonymize_plan_parser

In [None]:
state1 = {'question': "how did harry beat quirrell?"}
anonymized_question_output = anonymize_question_chain.invoke(state1)
anonymized_question = anonymized_question_output["anonymize_question"]
mapping = anonymized_question_output["mapping"]
print(f'anonimized_querry: {anonymized_question}')
print(f'mapping: {mapping}')
plan = planner.invoke({"question": anonymized_question})
print(f'plan: {plan}')
deanonimzed_plan = de_anonymize_plan_chain.invoke({"plan": plan.steps, "mapping": mapping})
print(f'deanonimized_plan: {deanonimzed_plan}')

In [276]:
from typing import Literal


def execute_step(state: PlanExecute):
    print("Executing step")
    plan = state["plan"]
    plan_str = "\n".join(f"{i+1}. {step}" for i, step in enumerate(plan))
    task = plan[0]
    task_formatted = f"""For the following plan:
{plan_str}\n\nYou are tasked with executing step {1}, {task}."""
    print(f'task_formatted: {task_formatted}')
    agent_response = agent_executor.invoke(
        {"messages": [("user", task_formatted)]}
    )
    # print(f'agent_response: {agent_response}')
    return {
        "past_steps": (task, agent_response["messages"][-1].content),
    }

def anonymize_queries(state: PlanExecute):
    print("Anonymizing question")
    anonymized_question_output = anonymize_question_chain.invoke(state['input'])
    anonymized_question = anonymized_question_output["anonymize_question"]
    mapping = anonymized_question_output["mapping"]
    return {"anonymized_input": anonymized_question, "mapping": mapping}

def deanonymize_queries(state: PlanExecute):
    print("De-anonymizing plan")
    deanonimzed_plan = de_anonymize_plan_chain.invoke({"plan": state["plan"], "mapping": state["mapping"]})
    return {"plan": deanonimzed_plan["plan"]}


def plan_step(state: PlanExecute):
    print("Planning step")
    plan = planner.invoke({"input": state['anonymized_input']})

    return {"plan": plan.steps}


def replan_step(state: PlanExecute):
    print("Replanning step")
    state = format_state_past_steps(state)
    state = clean_empty_fields_dictionary(state)
    print("state:")
    print(state)
    output =  replanner.invoke(state)
    return process_replanner_output(output)
  

def should_end(state: PlanExecute) -> Literal["agent", "__end__"]:
    print("Checking if should end")
    if "response" in state and state["response"]:
        return "__end__"
    else:
        return "agent"

In [277]:
from langgraph.graph import StateGraph

agent_workflow = StateGraph(PlanExecute)

# Add the anonymize node
agent_workflow.add_node("anonymize", anonymize_queries)
# Add the plan node
agent_workflow.add_node("planner", plan_step)
# Add the deanonymize node
agent_workflow.add_node("de-anonymize", deanonymize_queries)


# Add the execution step
agent_workflow.add_node("agent", execute_step)

# Add a replan node
agent_workflow.add_node("replan", replan_step)


agent_workflow.set_entry_point("anonymize")

# From anonymize we go to plan
agent_workflow.add_edge("anonymize", "planner")

# From plan we go to deanonymize
agent_workflow.add_edge("planner", "de-anonymize")

# From deanonymize we go to agent
agent_workflow.add_edge("de-anonymize", "agent")

# From agent, we replan
agent_workflow.add_edge("agent", "replan")

agent_workflow.add_conditional_edges(
    "replan",
    # Next, we pass in the function that will determine which node is called next.
    should_end,
)

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
plan_and_execute_app = agent_workflow.compile()

display(Image(plan_and_execute_app.get_graph(xray=True).draw_mermaid_png()))

<IPython.core.display.Image object>

In [278]:
def execute_plan_and_print_steps(inputs, recursion_limit=8):
    config = {"recursion_limit": recursion_limit}
    try:    
        for plan_output in plan_and_execute_app.stream(inputs, config=config):
            for agent_state_key, agent_state_value in plan_output.items():
                print(f' curr step: {agent_state_value}')
        return plan_output  # Return the final output if found
    except langgraph.pregel.GraphRecursionError:
        plan_output = "The answer wasn't found in the data."
        return plan_output

In [280]:
inputs = {"input": "how did harry beat quirrell?"}
final_answer = execute_plan_and_print_steps(inputs)
print(f'The final answer is: {final_answer}')

Anonymizing question
 curr step: {'anonymized_input': 'how did X beat Y?', 'mapping': {'X': 'Harry', 'Y': 'Quirrell'}}
Planning step
 curr step: {'plan': ["Formulate the query 'How did X beat Y?' clearly.", 'Use function A to retrieve relevant documents and chapter summaries based on the query.', 'Review the retrieved documents and chapter summaries to ensure they are relevant to the query.', "Use function B to answer the question 'How did X beat Y?' based on the context provided by the retrieved documents.", 'Review the answer provided by function B to ensure it directly addresses the query.']}
De-anonymizing plan
 curr step: {'plan': ["Formulate the query 'How did Harry beat Quirrell?' clearly.", 'Use function A to retrieve relevant documents and chapter summaries based on the query.', 'Review the retrieved documents and chapter summaries to ensure they are relevant to the query.', "Use function B to answer the question 'How did Harry beat Quirrell?' based on the context provided by 

KeyboardInterrupt: 

In [None]:
inputs = {"input": "who is charles darwin?"}
final_answer = execute_plan_and_print_steps(inputs)
print(f'The final answer is: {final_answer}')


In [78]:
input = {"input": "what did professor lupin teach?"}
final_answer = execute_plan_and_print_steps(input)
print(f'The final answer is: {final_answer}')

Anonymizing question
 curr step: {'anonymized_input': 'what did professor VAR1 teach?', 'mapping': {'VAR1': 'Lupin'}}
Planning step
 curr step: {'plan': ["Formulate the query to retrieve relevant documents about professor VAR1's teachings.", 'Use function A to retrieve both vector stores of chunks of a book and vector stores of the same book chapter summaries based on the query.', "Review the retrieved documents to ensure they contain relevant information about professor VAR1's teachings.", "Use function B to answer the question 'What did professor VAR1 teach?' based on the retrieved documents."]}
De-anonymizing plan
 curr step: {'plan': ["Formulate the query to retrieve relevant documents about professor Lupin's teachings.", 'Use function A to retrieve both vector stores of chunks of a book and vector stores of the same book chapter summaries based on the query.', "Review the retrieved documents to ensure they contain relevant information about professor Lupin's teachings.", "Use func

### Template for Answering Questions Using Context-Specific Information


### Model Evaluation


In [None]:
questions = [
    "Who gave Harry Potter his first broomstick?",
    "What is the name of the three-headed dog guarding the Sorcerer's Stone?",
    "Which house did the Sorting Hat initially consider for Harry?",
    "What is the name of Harry's owl?"
]
#     "How did Harry and his friends get past Fluffy?",
#     "What is the Mirror of Erised?",
#     "Who tried to steal the Sorcerer's Stone?",
#     "How did Harry defeat Quirrell/Voldemort?",
#     "What is Harry's parent's secret weapon against Voldemort?",
# ]

ground_truth_answers = [
    "Professor McGonagall",
    "Fluffy",
    "Slytherin",
    "Hedwig",
    # "They played music to put Fluffy to sleep.",
    # "A magical mirror that shows the 'deepest, most desperate desire of our hearts.'",
    # "Professor Quirrell, possessed by Voldemort",
    # "Harry's mother's love protected him, causing Quirrell/Voldemort pain when they touched him.",
    # "Love",
]

### Generating Answers and Retrieving Documents for Predefined Questions


In [None]:
generated_answers = []
retrieved_documents = []
for question in questions:
    result, all_context_book, all_context_summaries = answer_question_pipeline(question, chunks_retriever, chapter_summaries_retriever, answer_from_context_llm_chain, multi_query_retriver_llm)
    generated_answers.append(result['text'])
    retrieved_documents.append(all_context_book + all_context_summaries)


### Displaying Retrieved Documents and Generated Answers


In [None]:
print(f'retrieved_documents: {retrieved_documents}\n')
print(f'generated_answers: {generated_answers}')

### Preparing Data and Conducting Ragas Evaluation


In [None]:
# Prepare data for Ragas evaluation
data_samples = {
    'question': questions,  # Replace with your list of questions
    'answer': generated_answers,  # Replace with your list of generated answers
    'contexts': retrieved_documents,  # Your retrieved_documents list
    'ground_truth': ground_truth_answers  # Replace with your list of ground truth answers
}

# Convert contexts to list of strings (if necessary)
data_samples['contexts'] = [list(context) for context in data_samples['contexts']]

dataset = Dataset.from_dict(data_samples)

# Evaluate using Ragas with the specified metrics
metrics = [
    answer_correctness,
    faithfulness,
    answer_relevancy,
    context_recall,
    answer_similarity
]
llm = ChatOpenAI(temperature=0, model_name="gpt-4-1106-preview", max_tokens=4000)
score = evaluate(dataset, metrics=metrics, llm=llm)

# Print results and explanations
results_df = score.to_pandas()
print(results_df)

### Analyzing Metric Results from Ragas Evaluation


In [None]:
analyse_metric_results(results_df)

### Interactive Chat Interface for Harry Potter Inquiries


In [None]:
def chat_with_data(chunks_retriever, chapter_summaries_retriever, answer_from_context_llm_chain, multi_query_retriver_llm):
    """
    Provides an interactive chat interface for answering questions about Harry Potter.

    Args:
        retriever: A retriever for retrieving relevant documents.
        chapter_summaries_retriever: A retriever for retrieving relevant chapter summaries.
        answer_from_context_llm_chain: An LLM chain for answering questions based on context.
        multi_query_retriver_llm: An LLM for use in the MultiQueryRetriever.
    """

    print("You can start chatting with me about Harry Potter. Type 'exit' to stop.")

    while True:
        # Prompt the user for a question
        question = input("What's your question? \n")

        # Check if the user wants to exit
        if question.lower() == 'exit':
            print("Exiting chat. Goodbye!")
            break

        # Answer the question using the pipeline
        result, _, _ = answer_question_pipeline(
            question, chunks_retriever, chapter_summaries_retriever, answer_from_context_llm_chain, multi_query_retriver_llm
        )

        # Print the answer
        print("Answer:")
        wrapped_result = textwrap.fill(result['text'], width=120)  # Wrap text for readability
        print(wrapped_result)
        print("-" * 80)  # Print a separator line for readability

### Calling the chat_with_data function

In [None]:
chat_with_data(chunks_retriever,chapter_summaries_retriever, answer_from_context_llm_chain, multi_query_retriver_llm)