# Installing the required dependencies

In [1]:
# installing dependencies (in quiet mode)
!pip install langchain_community tiktoken langchain-openai lancedb langchain langchainhub langgraph tavily-python sentence-transformers -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.0/91.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.2/61.2 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.2/33.2 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.9/137.9 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import LanceDB
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import lancedb
from typing import Dict, TypedDict
from langchain_core.messages import BaseMessage
import json
import operator
from typing import Annotated, Sequence, TypedDict
from langchain import hub
from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import BaseMessage, FunctionMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough
from langchain_core.utils.function_calling import convert_to_openai_tool
import pprint
from langgraph.graph import END, StateGraph


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
os.environ["TAVILY_API_KEY"] = "Enter Tavily API Key"
os.environ["OPENAI_API_KEY"] = "Enter OpenAi API Key"

# Building Retriever

In [4]:
# Define a list of URLs pointing to Jay Alammar's articles on Transformers, BERT, and retrieval using transformers.
urls = [
    "https://link.springer.com/article/10.1007/s42979-021-00592-x",
]

# For each URL, create a WebBaseLoader instance and load the content.
# This returns a list of documents for each URL.
docs = [WebBaseLoader(url).load() for url in urls]

# Since each element in 'docs' is itself a list (if the loader splits the page into multiple parts),
# we flatten the list of lists into a single list containing all document parts.
docs_list = [item for sublist in docs for item in sublist]

# Document Chunking:
# Initialize a text splitter that uses a TikToken encoder. The splitter is configured to:
# - Create chunks with a maximum of 250 characters.
# - Have no overlap between consecutive chunks.
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250,  # Maximum characters per chunk
    chunk_overlap=0  # No overlap between chunks
)

# Split the documents into smaller chunks. This helps when processing long documents,
# as many downstream models (like language models) have input length limitations.
doc_splits = text_splitter.split_documents(docs_list)


In [9]:
def lanceDBConnection(embed):
    # Connect to a LanceDB database located at the specified path
    db = lancedb.connect("/tmp/lancedb")

    # Method to initialze a LanceDB database
    # Create a table named "crag_demo" in the database.
    # The table is created with initial data: a dictionary containing two keys:
    # - "vector": computed by embedding the string "Hello World" using the embed object
    # - "text": the actual string "Hello World"
    # The mode "overwrite" ensures that if a table with the same name exists, it will be replaced.
    table = db.create_table(
        "crag_demo",
        data=[{"vector": embed.embed_query("Hello World"), "text": "Hello World"}],
        mode="overwrite",
    )

    # Return the created table for further use
    return db


In [10]:
# Initialize the embedding model (in this case, using OpenAI's embeddings).
# This object converts text into vector representations.
embedder = OpenAIEmbeddings()

# Initialize the LanceDB database by calling the custom connection function.
# This creates a table and sets up the connection using the embedder.
table = lanceDBConnection(embedder)

# Create a vector store using LanceDB from the pre-split documents.
# - documents: the list of document chunks (doc_splits) created earlier.
# - embedding: the embedder object that will convert each chunk into a vector.
# - connection: the LanceDB table (or connection) established above.
vectorstore = LanceDB.from_documents(
    documents=doc_splits,
    embedding=embedder,
    connection=table,
)

# Convert the vector store into a retriever.
# This retriever can now be used to perform similarity searches over the stored document embeddings.
retriever = vectorstore.as_retriever()


# Define Langraph

In [11]:
#The GraphState class is defined using TypedDict,
#which specifies that the keys attribute must be a dictionary where each key is a string and the corresponding value can be of any type.

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        keys: A dictionary where each key is a string.
    """

    keys: Dict[str, any]

In [12]:
def retrieve(state):
    """
    Helper function for retrieving documents.

    Args:
        state (dict): The current graph state. It is expected to be a dictionary with a nested "keys" dictionary.
                      This "keys" dictionary should already contain a "question" key, representing the query text.

    Returns:
        dict: A new state where the "keys" dictionary is updated to include:
              - "question": The original query text.
              - "documents": The documents that are relevant to that query, as returned by the retriever.
    """
    # Print a header message to indicate that the retrieval process is starting.
    # The asterisks provide a clear visual marker in the console output.
    print("*" * 5, " RETRIEVE ", "*" * 5)

    # Extract the inner dictionary stored under the "keys" key from the state.
    # This inner dictionary holds our state information such as the question.
    state_dict = state["keys"]

    # Retrieve the question from the state. This "question" is the user's query text that we want to process.
    question = state_dict["question"]

    # Use the retriever to fetch documents relevant to the question.
    # The retriever (assumed to be set up elsewhere in the code) searches for documents whose content semantically matches the question.
    documents = retriever.get_relevant_documents(question)

    # Create and return a new state dictionary.
    # Here we package the original question along with the retrieved documents back into a dictionary under the "keys" key.
    # This updated state can then be passed along to subsequent functions or steps in a processing pipeline.
    return {"keys": {"documents": documents, "question": question}}


In [13]:
def generate(state):
    """
    Helper function for generating answers

    Args:
        state (dict): The current graph state, expected to contain a "keys" dictionary
                      with a "question" key and a "documents" key (retrieved earlier).

    Returns:
        dict: A new state where the "keys" dictionary is updated to include the generated answer.
              The updated state contains the original "question", "documents", and a new "generation" key.
    """
    # Print a visual marker to indicate that the generation process has started.
    print("*" * 5, " GENERATE ", "*" * 5)

    # Extract the inner state dictionary from the overall state.
    state_dict = state["keys"]

    # Retrieve the question and documents from the state.
    question = state_dict["question"]
    documents = state_dict["documents"]

    # Retrieve a prompt template from a hub. Here, "rlm/rag-prompt" likely represents a template
    # used to combine the context (documents) with the question in a format that the LLM expects.
    prompt = hub.pull("rlm/rag-prompt")

    # Initialize the language model (LLM) using ChatOpenAI.
    # - model_name: Specifies the particular model version (here, a preview version of GPT-4).
    # - temperature: Set to 0 for deterministic outputs.
    # - streaming: Set to True to allow the LLM to return outputs as they are generated.
    # llm = ChatOpenAI(model_name="gpt-4-0125-preview", temperature=0, streaming=True)
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

    # Define a nested helper function for formatting retrieved documents.
    # This function concatenates the 'page_content' of each document in the list, separated by two newlines.
    # Note: In the current code snippet, this function is defined but not explicitly used.
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    # Construct the RAG (Retrieval-Augmented Generation) chain.
    # The pipeline is created by chaining together:
    # 1. The prompt (template)
    # 2. The language model (LLM)
    # 3. A string output parser (StrOutputParser) that likely cleans or formats the LLM output.
    # The "|" operator is used here to create a pipeline where the output of one component
    # feeds into the next.
    rag_chain = prompt | llm | StrOutputParser()

    # Invoke the RAG chain with a dictionary containing:
    # - "context": The retrieved documents which provide supporting information.
    # - "question": The original query that needs to be answered.
    # The chain processes this input, applies the prompt, passes it to the LLM, and finally parses the output.
    generation = rag_chain.invoke({"context": documents, "question": question})

    # Return a new state with the same "question" and "documents", and add the "generation" key
    # which holds the output from the LLM (the final answer).
    return {
        "keys": {"documents": documents, "question": question, "generation": generation}
    }


In [14]:
def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state, expected to have a "keys" dictionary
                      containing at least "question" and "documents".

    Returns:
        dict: A new state where the "keys" dictionary is updated with:
              - "documents": Only the documents deemed relevant.
              - "question": The original question.
              - "run_web_search": A flag indicating whether to perform a web search.
    """
    # Print a header message to indicate that the relevance check is starting.
    print("*" * 5, " DOCS RELEVANCE CHECK", "*" * 5)

    # Extract the inner dictionary from the overall state.
    state_dict = state["keys"]

    # Retrieve the question (user query) and the list of retrieved documents.
    question = state_dict["question"]
    documents = state_dict["documents"]

    # Define a data model using Pydantic's BaseModel to represent the relevance grade.
    # This model expects a single field, binary_score, which should be either 'yes' or 'no'.
    class grade(BaseModel):
        """Binary score for relevance check."""
        binary_score: str = Field(description="Relevance score 'yes' or 'no'")

    # Initialize a language model (LLM) with deterministic behavior.
    # The model used here is a GPT-4 preview, with streaming enabled for real-time output.
    # model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
    model = ChatOpenAI(temperature=0, model="gpt-4o-mini", streaming=True)

    # Convert the Pydantic data model into an OpenAI tool.
    # This allows the LLM to interact with our defined function schema.
    grade_tool_oai = convert_to_openai_tool(grade)

    # Bind the LLM with the tool, enforcing that the tool is used during the invocation.
    # This means that the LLM will call a function named "grade" when processing the prompt.
    llm_with_tool = model.bind(
        tools=[convert_to_openai_tool(grade_tool_oai)],
        tool_choice={"type": "function", "function": {"name": "grade"}},
    )

    # Create a parser that uses the Pydantic model to parse the LLM's output.
    # It will convert the raw output from the LLM into a structured format based on our grade model.
    parser_tool = PydanticToolsParser(tools=[grade])

    # Define the prompt template for the grading task.
    # The prompt instructs the LLM to assess whether the given document (context) is relevant
    # to the provided user question. The LLM should return a binary score: 'yes' or 'no'.
    prompt = PromptTemplate(
        template="""You are a grader assessing relevance of a retrieved document to a user question. \n
        Here is the retrieved document: \n\n {context} \n\n
        Here is the user question: {question} \n
        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""",
        input_variables=["context", "question"],
    )

    # Create the RAG (Retrieval-Augmented Generation) chain by chaining together:
    # 1. The prompt to format the input.
    # 2. The LLM with the tool bound for invoking the grading function.
    # 3. The parser to convert the LLM output into our structured grade model.
    chain = prompt | llm_with_tool | parser_tool

    # Initialize a list to hold documents that are considered relevant.
    filtered_docs = []

    # Initialize a flag for web search. Default is "No", meaning no additional web search is needed.
    search = "No"

    # Loop through each retrieved document.
    for d in documents:
        # Invoke the chain for each document with the document's content and the question.
        # The chain processes the prompt, gets a response from the LLM, and then parses the output.
        score = chain.invoke({"question": question, "context": d.page_content})

        # Extract the binary score from the parsed result.
        grade_result = score[0].binary_score

        # If the document is graded as relevant ('yes'), add it to the filtered list.
        if grade_result == "yes":
            print("*" * 5, " RATED DOCUMENT: RELEVANT", "*" * 5)
            filtered_docs.append(d)
        else:
            # If not relevant, log the result and set the flag to "Yes",
            # which might indicate that a web search should be performed to supplement the retrieval.
            print("*" * 5, " RATED DOCUMENT: NOT RELEVANT", "*" * 5)
            search = "Yes"
            continue  # Skip adding this document

    # Return the updated state with:
    # - filtered_docs: Only the documents rated as relevant.
    # - question: The original user question.
    # - run_web_search: A flag indicating whether additional web search is needed.
    return {
        "keys": {
            "documents": filtered_docs,
            "question": question,
            "run_web_search": search,
        }
    }


In [15]:
def transform_query(state):
    """
    Helper function for transforming the query to produce a better question.

    Args:
        state (dict): The current graph state containing at least a "question" and "documents" in its "keys".

    Returns:
        dict: A new state where the "question" key is updated with a re-phrased version of the original question.
    """

    # Print a header message to indicate that the query transformation process has started.
    print("*" * 5, "TRANSFORM QUERY", "*" * 5)

    # Extract the inner dictionary from the state that holds our relevant data.
    state_dict = state["keys"]

    # Retrieve the current question from the state.
    question = state_dict["question"]

    # Retrieve the documents; these are carried over unchanged.
    documents = state_dict["documents"]

    # Create a prompt template designed to instruct the LLM to rephrase the original question.
    # The template provides clear instructions to generate an improved, optimized question for retrieval.
    prompt = PromptTemplate(
        template="""You are generating questions that is well optimized for retrieval. \n
        Look at the input and try to reason about the underlying semantic intent / meaning. \n
        Here is the initial question:
        \n --------- \n
        {question}
        \n --------- \n
        Formulate an improved question: """,
        input_variables=["question"],
    )

    # Initialize the language model (LLM) using ChatOpenAI with a specific GPT-4 preview model.
    # Temperature is set to 0 for deterministic output, and streaming is enabled.
    model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

    # Build a chain that links the prompt, the language model, and a string output parser.
    # The chain takes the input prompt, gets processed by the LLM, and then the output is parsed into a string.
    chain = prompt | model | StrOutputParser()

    # Invoke the chain with the current question to get a rephrased, better version of the question.
    better_question = chain.invoke({"question": question})

    # Return an updated state containing the original documents and the improved question.
    return {"keys": {"documents": documents, "question": better_question}}


In [16]:
def web_search(state):
    """
    Helper function to do Web search based on the re-phrased question using Tavily API.

    Args:
        state (dict): The current graph state containing at least "question" and "documents" in its "keys".

    Returns:
        dict: A new state where the "documents" key is updated with additional web search results appended.
    """

    # Print a header message to signal the start of the web search process.
    print("*" * 5, " WEB SEARCH ", "*" * 5)

    # Extract the inner state dictionary from the overall state.
    state_dict = state["keys"]

    # Retrieve the current question (which is assumed to be the re-phrased question).
    question = state_dict["question"]

    # Retrieve the current list of documents.
    documents = state_dict["documents"]

    # Initialize the web search tool. Here, TavilySearchResults is assumed to be a tool
    # that interfaces with the Tavily API to perform web searches.
    tool = TavilySearchResults()

    # Invoke the tool with the question as the query.
    # This should return a list of search results, where each result is expected to be a dictionary
    # containing at least a "content" field.
    docs = tool.invoke({"query": question})

    # Extract the "content" from each search result and join them into a single string.
    # The newline character ("\n") separates the content from different search results.
    web_results = "\n".join([d["content"] for d in docs])

    # Wrap the concatenated web results in a Document object.
    # This makes the web results compatible with the other document objects in the system.
    web_results = Document(page_content=web_results)

    # Append the newly created Document (containing the web search results) to the existing list of documents.
    documents.append(web_results)

    # Return the updated state with the appended web results.
    # The question is preserved while the documents list now includes both the original documents and the new web search result.
    return {"keys": {"documents": documents, "question": question}}


# Graph Edges

In [17]:
def decide_to_generate(state):
    """
    Helper function to determine whether to generate an answer or re-generate a question for web search.

    Args:
        state (dict): The current state of the agent, including all keys in its "keys" dictionary.

    Returns:
        str: The name of the next node to call, either "transform_query" or "generate".
    """

    # Print a header message to signal that the decision-making process has started.
    print("*" * 5, " DECIDE TO GENERATE ", "*" * 5)

    # Extract the inner dictionary from the overall state, which holds the relevant keys.
    state_dict = state["keys"]

    # Retrieve the current question from the state.
    question = state_dict["question"]

    # Retrieve the filtered documents from the state. These documents have been previously processed.
    filtered_documents = state_dict["documents"]

    # Retrieve the flag indicating whether a web search should be run.
    # This flag ("run_web_search") was set in a previous step, based on document relevance.
    search = state_dict["run_web_search"]

    # Decision-making logic:
    # If the flag "search" is "Yes", it indicates that the documents were not sufficiently relevant.
    # In that case, we should re-generate a new query (and subsequently perform a web search).
    if search == "Yes":
        # Log the decision.
        print("*" * 5, " DECISION: TRANSFORM QUERY and RUN WEB SEARCH ", "*" * 5)
        # Return the identifier for the next node that will transform the query.
        return "transform_query"
    else:
        # If the flag is not "Yes", it means we have enough relevant documents.
        # Log the decision.
        print("*" * 5, " DECISION: GENERATE ", "*" * 5)
        # Return the identifier for the node that will generate an answer using the relevant documents.
        return "generate"


# Build Graph

The workflow defines the order in which nodes are executed:

--> The workflow begins at the retrieve node.

--> After retrieving documents, the next step is to grade their relevance.

--> Here, decide_to_generate inspects the state (especially the run_web_search flag) and returns either "transform_query" or "generate". Depending on that result:

1. If the decision is "transform_query", the workflow continues at the transform_query node.

2. If it is "generate", it directly goes to the generate node.

--> If the query was transformed, it then flows from transform_query to web_search to get additional results.

--> After the web search, the process goes to generate where an answer is produced.
Finally, once the answer is generated, the workflow reaches the end (marked by END).

--> The compile() function finalizes the workflow. This step converts the graph into an executable application (or pipeline), which can then be run with an initial state. The compiled app manages the flow, ensuring that state is passed between nodes as defined by the edges and conditional logic.


In [18]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # Retrieve documents from the source.
workflow.add_node("grade_documents", grade_documents)  # Grade the relevance of the retrieved documents.
workflow.add_node("generate", generate)  # Generate an answer using the documents.
workflow.add_node("transform_query", transform_query)  # Transform the query to optimize it for retrieval.
workflow.add_node("web_search", web_search)  # Perform a web search to supplement documents.


# Build graph
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")

# decide_to_generate inspects the state and returns either transform_query or generate depending on the result
# If the decision is transform_query, the workflow continues at the transform_query node, if its generate, it directly goes to the generate node
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)
workflow.add_edge("transform_query", "web_search")
workflow.add_edge("web_search", "generate")
workflow.add_edge("generate", END)

# Compile
app = workflow.compile()

In [19]:
# Run
# Set the initial query prompt.
# query_prompt = "How Transformers work?"
query_prompt = "What is Machine Learning?"

# Build the initial input state as a dictionary.
# The structure uses a "keys" key to store all state values, following our GraphState definition.
inputs = {"keys": {"question": query_prompt}}

# Stream through the workflow.
# app.stream(inputs) executes the graph node-by-node, yielding the state at each step.
for output in app.stream(inputs):
    # For each output (which could contain multiple nodes in parallel or sequentially),
    # iterate over its items. Each key represents a node, and its value holds the state.
    for key, value in output.items():
        # Print the full state (stored under the "keys" key) at the current node.
        # This helps in understanding how the state evolves as it passes through each node.
        pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    # Print a separator between outputs from different nodes for clarity.
    pprint.pprint("------------------------")

# After processing the pipeline, print the final generated answer.
print("*" * 5, " Generated Answer ", "*" * 5)
# The final state is held in 'value' from the last iteration of the loop.
# We access the "generation" key to get the final answer produced by the generate node.
pprint.pprint(value["keys"]["generation"])


*****  RETRIEVE  *****


  documents = retriever.get_relevant_documents(question)


{ 'documents': [ Document(metadata={'description': 'In the current age of the Fourth Industrial Revolution (4IR or Industry 4.0), the digital world has a wealth of data, such as Internet of Things (IoT) data', 'language': 'en', 'source': 'https://link.springer.com/article/10.1007/s42979-021-00592-x', 'title': 'Machine Learning: Algorithms, Real-World Applications and Research Directions | SN Computer Science\n        '}, page_content='AbstractIn the current age of the Fourth Industrial Revolution (4IR or Industry 4.0), the digital world has a wealth of data, such as Internet of Things (IoT) data, cybersecurity data, mobile data, business data, social media data, health data, etc. To intelligently analyze these data and develop the corresponding smart and automated\xa0applications, the knowledge of artificial intelligence (AI), particularly, machine learning (ML) is the key. Various types of machine learning algorithms such as supervised, unsupervised, semi-supervised, and reinforcement



{ 'documents': [ Document(metadata={'description': 'In the current age of the Fourth Industrial Revolution (4IR or Industry 4.0), the digital world has a wealth of data, such as Internet of Things (IoT) data', 'language': 'en', 'source': 'https://link.springer.com/article/10.1007/s42979-021-00592-x', 'title': 'Machine Learning: Algorithms, Real-World Applications and Research Directions | SN Computer Science\n        '}, page_content='AbstractIn the current age of the Fourth Industrial Revolution (4IR or Industry 4.0), the digital world has a wealth of data, such as Internet of Things (IoT) data, cybersecurity data, mobile data, business data, social media data, health data, etc. To intelligently analyze these data and develop the corresponding smart and automated\xa0applications, the knowledge of artificial intelligence (AI), particularly, machine learning (ML) is the key. Various types of machine learning algorithms such as supervised, unsupervised, semi-supervised, and reinforcement

In [20]:
# Define the input state with a question that might not have related context
inputs = {
    "keys": {
        "question": "Explain a framework called React.Js?"
    }
}

# Run the workflow using the stream() method.
# The workflow processes the input through all defined nodes, yielding the state at each step.
for output in app.stream(inputs):
    # For each output (which represents one or more nodes in the pipeline),
    # iterate over the items to inspect the state of each node.
    for key, value in output.items():
        # Print the full state stored under the "keys" dictionary.
        # This allows you to see how the state evolves through each node (e.g., retrieval, grading, etc.).
        pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    # Print a separator line to distinguish the output of different nodes.
    pprint.pprint("------------------------")

# After streaming through the workflow, the final state is obtained.
# Print the final generated answer.
print("*" * 5, " Generated Answer ", "*" * 5)
pprint.pprint(value["keys"]["generation"])


*****  RETRIEVE  *****
{ 'documents': [ Document(metadata={'description': 'In the current age of the Fourth Industrial Revolution (4IR or Industry 4.0), the digital world has a wealth of data, such as Internet of Things (IoT) data', 'language': 'en', 'source': 'https://link.springer.com/article/10.1007/s42979-021-00592-x', 'title': 'Machine Learning: Algorithms, Real-World Applications and Research Directions | SN Computer Science\n        '}, page_content='“Machine Learning Tasks and Algorithms” can help to build context-aware adaptive and smart applications according to the preferences of the mobile phone users.'),
                 Document(metadata={'description': 'In the current age of the Fourth Industrial Revolution (4IR or Industry 4.0), the digital world has a wealth of data, such as Internet of Things (IoT) data', 'language': 'en', 'source': 'https://link.springer.com/article/10.1007/s42979-021-00592-x', 'title': 'Machine Learning: Algorithms, Real-World Applications and Resea



{ 'documents': [ Document(metadata={}, page_content="ReactJS is a popular JavaScript library developed by Facebook for building dynamic and interactive user interfaces, particularly for\nReact is a JavaScript library used for building user interfaces, especially for single-page applications where the user interacts with the\nReact is a framework that employs Webpack to automatically compile React, JSX, and ES6 code while handling CSS file prefixes. React is a JavaScript-based UI development library. Although React is a library rather than a language, it is widely used in web development. The library first appeared in May 2013 and is now one of the most commonly used frontend libraries for web development. [...] ReactJS is a free, element front-end toolkit that is exclusively in charge of the software's layered architecture.\n\n### 4\\. What is the difference between React and React JS?\n\nAlthough Reactjs is essentially a Software framework and React Native is the whole framework, the 