<a href="https://colab.research.google.com/github/robertheubanks/AI-Engineering-Demo-Day/blob/main/BioMedical_Research_AI_RAG_with_Agent_v032824.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
 # This code block installs the required libraries for the project.
 !pip install -qU langchain langchain_openai langgraph arxiv duckduckgo-search pubmed wolframalpha xmltodict


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m810.5/810.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.4/52.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m273.0/273.0 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.9/86.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m262.9/262.9 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m5.4 MB/s

In [2]:
# This code block installs additional libraries:
!pip install -qU faiss-cpu pymupdf

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m52.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.8/30.8 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# This code block imports the necessary modules: os for interacting with the operating system and getpass for securely prompting the user for input without echoing the input to the console.
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

OpenAI API Key:··········


In [4]:
# Similar to the previous code block, this one prompts the user to enter their Wolfram Alpha App ID using getpass.getpass().
import os

os.environ["WOLFRAM_ALPHA_APPID"] = getpass.getpass ("WOLFRAM_ALPHA_APPID")

WOLFRAM_ALPHA_APPID··········


In [5]:
# This code block imports the uuid4 function from the uuid module to generate unique identifiers.
# It sets the environment variable "LANGCHAIN_TRACING_V2" to "true" to enable tracing in langchain.
# It generates a unique project name using uuid4().hex[0:8] and sets it as the "LANGCHAIN_PROJECT" environment variable.
# It prompts the user to enter their LangSmith API key using getpass.getpass() and stores it in the "LANGCHAIN_API_KEY" environment variable.
from uuid import uuid4

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"AIE1 - LangGraph - {uuid4().hex[0:8]}"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangSmith API Key: ")

LangSmith API Key: ··········


In [6]:
# This code block imports necessary classes from langchain and langchain_community libraries.
# It uses the ArxivLoader to load a maximum of 5 documents from arXiv related to the query "CRISPR".
# The loaded documents are then split into chunks using the RecursiveCharacterTextSplitter with a chunk size of 500 characters and an overlap of 50 characters between chunks.
# The chunked documents are then used to create a vector store using the FAISS class from langchain_community.
# The OpenAIEmbeddings class is used to generate embeddings for the documents using the "text-embedding-3-small" model.
# Finally, a retriever object is created from the faiss_vectorstore to enable retrieval of relevant documents based on queries.
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import ArxivLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

docs = ArxivLoader(query="CRISPR", load_max_docs=5).load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)

chunked_documents = text_splitter.split_documents(docs)

faiss_vectorstore = FAISS.from_documents(
    documents=chunked_documents,
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"),
)

retriever = faiss_vectorstore.as_retriever()

In [7]:
# This code block imports the ChatPromptTemplate class from langchain_core.prompts.
# It defines a prompt template named RAG_PROMPT using a multi-line string. The prompt template includes placeholders for the user's question ({question}) and the relevant context ({context}).
# The ChatPromptTemplate.from_template() method is used to create a prompt object named rag_prompt from the RAG_PROMPT template.
from langchain_core.prompts import ChatPromptTemplate

RAG_PROMPT = """\
Use the following context to answer the user's query. If you cannot answer the question, please respond with 'I don't know'.

Question:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [8]:
# This code block imports the ChatOpenAI class from langchain_openai.
# It creates an instance of the ChatOpenAI class named openai_chat_model with the model set to "gpt-3.5-turbo".
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")

In [9]:
# This code block imports necessary classes and functions from langchain.
# It creates a retrieval-augmented generation chain named retrieval_augmented_generation_chain.
# The chain is defined using the LCEL (LangChain Expression Language) syntax.
# It starts by retrieving the context based on the user's question using the retriever object and the itemgetter function.
# The retrieved context is then assigned to the context variable using the RunnablePassthrough.assign() method.
# The rag_prompt is then used to generate a response using the openai_chat_model based on the user's question and the retrieved context.
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_generation_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
)

In [10]:
# This code block invokes the retrieval_augmented_generation_chain using the ainvoke() method, which is an asynchronous version of the invoke() method.
# It passes a dictionary containing the user's question ("What is CRISPR?") as the input to the chain.
# The chain processes the input, retrieves the relevant context, generates a response using the OpenAI chat model, and returns the generated response.
await retrieval_augmented_generation_chain.ainvoke({"question" : "What is CRISPR?"})

{'response': AIMessage(content='CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats.', response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 3043, 'total_tokens': 3063}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}),
 'context': [Document(page_content='the bacterial genome from foreign mobile elements such as phage and plasmids [11, 50].\nThe building blocks of CRISPR systems were ﬁrst identiﬁed thirty years ago, when inter-\nspaced DNA repeats were found in speciﬁc regions of the E. coli genome [30]. Research\neﬀorts quantiﬁed the variation in these sequences, their lengths and positions, in bacterial\nand archaea genomes [5, 9, 39] before the name CRISPR was ﬁrst applied [31]. CRISPR\nsystems have now been identiﬁed in nearly 45% of bacterial strains and have been classiﬁed\ninto three types that are further divided according to the phylogeny of the CRISPR sequences\nand

In [11]:
# This code block imports various tool classes from the langchain_community library.
# It creates a list named tool_belt that contains instances of the following tools:
# ArxivQueryRun(): A tool for running queries on the arXiv database.
# DuckDuckGoSearchRun(): A tool for performing searches using the DuckDuckGo search engine.
# The PubmedQueryRun() and WolframAlphaAPIWrapper() tools are commented out and not included in the tool_belt.

# from langchain_community.tools.pubmed.tool import PubmedQueryRun
from langchain_community.tools.arxiv.tool import ArxivQueryRun
from langchain_community.tools.ddg_search import DuckDuckGoSearchRun
# from langchain_community.utilities.wolfram_alpha import WolframAlphaAPIWrapper

tool_belt = [
    # PubmedQueryRun(),
    ArxivQueryRun(),
    DuckDuckGoSearchRun(),
    # WolframAlphaAPIWrapper(),
]

In [12]:
# This code block imports the ToolExecutor class from the langgraph.prebuilt module.
# It creates an instance of the ToolExecutor class named tool_executor and passes the tool_belt as an argument.
# The ToolExecutor is responsible for executing the tools in the tool_belt based on the provided input.
from langgraph.prebuilt import ToolExecutor

tool_executor = ToolExecutor(tool_belt)

In [13]:
# This code block imports the ChatOpenAI class from the langchain_openai module.
# It creates an instance of the ChatOpenAI class named model with the temperature parameter set to 0.
# The temperature parameter controls the randomness of the generated responses. A value of 0 means the model will generate deterministic responses.
from langchain_openai import ChatOpenAI

model = ChatOpenAI(temperature=0)

In [14]:
# This code block imports the convert_to_openai_function function from the langchain_core.utils.function_calling module.
# It creates a list named functions by applying the convert_to_openai_function function to each tool in the tool_belt.
# The convert_to_openai_function function converts the tools into a format compatible with OpenAI's function calling API.
# It then binds the converted functions to the model using the bind_functions() method, allowing the model to utilize the tools during generation.
from langchain_core.utils.function_calling import convert_to_openai_function

functions = [convert_to_openai_function(t) for t in tool_belt]
model = model.bind_functions(functions)

In [15]:
# This code block imports necessary types and classes from the typing module and the langchain_core.messages module.
# It defines a custom type named AgentState using the TypedDict class from the typing module.
# The AgentState type has a field named messages of type Annotated[Sequence[BaseMessage], operator.add].
# Sequence[BaseMessage] indicates that messages is a sequence of BaseMessage objects.
# Annotated is used to attach metadata to the type, in this case, specifying that the messages field should be treated as a concatenable sequence using the operator.add function.
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

class AgentState(TypedDict):
  messages: Annotated[Sequence[BaseMessage], operator.add]

In [16]:
# This code block imports necessary classes and functions from the langgraph.prebuilt, json, and langchain_core.messages modules.
# It defines two functions: call_model and call_tool.
# The call_model function takes the state as input, extracts the messages from the state, invokes the model with the messages, and returns a dictionary containing the response message.
# The call_tool function takes the state as input, extracts the last message from the state, creates a ToolInvocation object using the tool name and arguments from the last message, invokes the tool_executor with the ToolInvocation, creates a FunctionMessage object with the tool response and name, and returns a dictionary containing the FunctionMessage.
from langgraph.prebuilt import ToolInvocation
import json
from langchain_core.messages import FunctionMessage

def call_model(state):
  messages = state["messages"]
  response = model.invoke(messages)
  return {"messages" : [response]}

def call_tool(state):
  last_message = state["messages"][-1]

  action = ToolInvocation(
      tool=last_message.additional_kwargs["function_call"]["name"],
      tool_input=json.loads(
          last_message.additional_kwargs["function_call"]["arguments"]
      )
  )

  response = tool_executor.invoke(action)

  function_message = FunctionMessage(content=str(response), name=action.tool)

  return {"messages" : [function_message]}

In [17]:
# This code block imports the StateGraph and END classes from the langgraph.graph module.
# It creates an instance of the StateGraph class named workflow with the AgentState type.
# It adds two nodes to the workflow using the add_node() method:
# The "agent" node is associated with the call_model function.
# The "action" node is associated with the call_tool function.
from langgraph.graph import StateGraph, END

workflow = StateGraph(AgentState)

workflow.add_node("agent", call_model)
workflow.add_node("action", call_tool)

In [18]:
# This code block sets the entry point of the workflow to the "agent" node using the set_entry_point() method.
# The entry point determines the starting node of the workflow when it is executed.
workflow.set_entry_point("agent")

In [19]:
# This code block defines a function named should_continue that takes the state as input.
# The should_continue function checks the last message in the state to determine whether to continue or end the workflow.
# If the last message does not have a "function_call" in its additional_kwargs, it returns "end".
# Otherwise, it returns "continue".
# It adds conditional edges to the workflow using the add_conditional_edges() method.
# The conditional edges originate from the "agent" node and are based on the should_continue function.
# If the should_continue function returns "continue", the workflow transitions to the "action" node.
# If the should_continue function returns "end", the workflow transitions to the END node, indicating the end of the workflow.
def should_continue(state):
  last_message = state["messages"][-1]

  if "function_call" not in last_message.additional_kwargs:
    return "end"

  return "continue"

workflow.add_conditional_edges(
    "agent",
    should_continue,
    {
        "continue" : "action",
        "end" : END
    }
)

In [20]:
# This code block adds an edge to the workflow using the add_edge() method.
# The edge connects the "action" node to the "agent" node, indicating that after executing the "action" node, the workflow should transition back to the "agent" node.
workflow.add_edge("action", "agent")

In [21]:
# This code block compiles the workflow using the compile() method.
# The compile() method prepares the workflow for execution by optimizing and validating the graph structure.
# The compiled workflow is assigned to the variable app, which can be used to invoke the workflow with input data.
app = workflow.compile()

In [22]:
# This code block imports the HumanMessage class from the langchain_core.messages module.
# It creates a dictionary named inputs containing a list of messages, which in this case consists of a single HumanMessage with the content "What is the fundamental mechanism behind CRISPR-Cas9 gene editing, and how does it differ from other gene editing techniques?".
# It invokes the compiled workflow app using the invoke() method, passing the inputs dictionary as an argument.
# The workflow processes the input message, executes the defined nodes and edges, and returns the generated response.
from langchain_core.messages import HumanMessage

inputs = {"messages" : [HumanMessage(content="What is the fundamental mechanism behind CRISPR-Cas9 gene editing, and how does it differ from other gene editing techniques?")]}

app.invoke(inputs)

{'messages': [HumanMessage(content='What is the fundamental mechanism behind CRISPR-Cas9 gene editing, and how does it differ from other gene editing techniques?'),
  AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"query":"CRISPR-Cas9 gene editing mechanism"}', 'name': 'arxiv'}}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 177, 'total_tokens': 199}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'function_call', 'logprobs': None}),
  FunctionMessage(content='Published: 2009-01-20\nTitle: Quantum effects in optomechanical systems\nAuthors: C. Genes, A. Mari, D. Vitali, P. Tombesi\nSummary: The search for experimental demonstrations of the quantum behavior of\nmacroscopic mechanical resonators is a fastly growing field of investigation\nand recent results suggest that the generation of quantum states of resonators\nwith a mass at the microgram scale is within reach. In this chapter we give

In [23]:
# This code block defines two functions: convert_state_to_query and convert_response_to_state.
# The convert_state_to_query function takes a state_object as input, extracts the last message's content from the state, and returns a dictionary with the key "question" and the extracted content as the value.
# The convert_response_to_state function takes a response as input and returns a dictionary with the key "messages" and a list containing the "response" field of the input response.
# It creates a new chain named langgraph_node_rag_chain by composing the convert_state_to_query function, the retrieval_augmented_generation_chain, and the convert_response_to_state function using the | operator.
# The resulting langgraph_node_rag_chain takes a state object as input, converts it to a query, passes it through the retrieval-augmented generation chain, and converts the response back to a state object.
def convert_state_to_query(state_object):
  return {"question" : state_object["messages"][-1].content}

def convert_response_to_state(response):
  return {"messages" : [response["response"]]}

langgraph_node_rag_chain = convert_state_to_query | retrieval_augmented_generation_chain | convert_response_to_state

In [24]:
# This code block invokes the langgraph_node_rag_chain using the ainvoke() method, which is an asynchronous version of the invoke() method.
# It passes the inputs dictionary as an argument to the ainvoke() method.
# The langgraph_node_rag_chain processes the input state, converts it to a query, generates a response using the retrieval-augmented generation chain, converts the response back to a state object, and returns the resulting state.
await langgraph_node_rag_chain.ainvoke(inputs)

{'messages': [AIMessage(content='The fundamental mechanism behind CRISPR-Cas9 gene editing involves using a single, custom guide sequence to re-program the Cas9 protein to make specific genomic cuts for sequence insertions or deletions. This differs from other gene editing techniques in its simplicity and specificity, allowing for precise modifications to be made in both prokaryotic and eukaryotic cells.', response_metadata={'token_usage': {'completion_tokens': 73, 'prompt_tokens': 3510, 'total_tokens': 3583}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None})]}

In [25]:
# This code block creates a new instance of the StateGraph class named rag_agent with the AgentState type.
# It adds three nodes to the rag_agent graph using the add_node() method:
# The "agent" node is associated with the call_model function.
# The "action" node is associated with the call_tool function.
# The "first_action" node is associated with the langgraph_node_rag_chain chain.
rag_agent = StateGraph(AgentState)

rag_agent.add_node("agent", call_model)
rag_agent.add_node("action", call_tool)
rag_agent.add_node("first_action", langgraph_node_rag_chain)

In [26]:
# This code block sets the entry point of the rag_agent graph to the "first_action" node using the set_entry_point() method.
# The entry point determines the starting node of the graph when it is executed.
rag_agent.set_entry_point("first_action")

In [27]:
# This code block defines a function named is_fully_answered that takes the state as input.
# The function extracts the question and answer from the state messages.
# It defines a Pydantic model named answered with a single field binary_score of type string, which represents whether the question is fully answered or not.
# It creates an instance of the ChatOpenAI class with the model set to "gpt-4-turbo-preview" and temperature set to 0.
# It converts the answered model to an OpenAI tool using the convert_to_openai_tool function.
# It binds the converted answered_tool to the model using the bind() method, specifying the tool choice as a function with the name "answered".
# It creates a PydanticToolsParser instance with the answered tool.
# It defines a prompt template using the PromptTemplate class, which asks whether the question is fully answered by the response.
# It creates a classic LCEL chain named fully_answered_chain by composing the prompt, model, and parser tool using the | operator.
# It invokes the fully_answered_chain with the question and answer as input.
# If the binary score of the response is "no", it returns "continue"; otherwise, it returns "end".
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.utils.function_calling import convert_to_openai_tool

def is_fully_answered(state):


  question = state["messages"][0].content
  answer = state["messages"][-1].content


  class answered(BaseModel):
    binary_score: str = Field(description="Fully answered: 'yes' or 'no'")


  model = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)


  answered_tool = convert_to_openai_tool(answered)

  model = model.bind(
      tools=[answered_tool],
      tool_choice={"type" : "function", "function" : {"name" : "answered"}}
  )


  parser_tool = PydanticToolsParser(tools=[answered])

  prompt = PromptTemplate(
      template="""You will determine if the question is fully answered by the response.\n
      Question:
      {question}

      Response:
      {answer}

      You will respond with either 'yes' or 'no'.""",
      input_variables=["question", "answer"])


  fully_answered_chain = prompt | model | parser_tool

  response = fully_answered_chain.invoke({"question" : question, "answer" : answer})

  if response[0].binary_score == "no":
    return "continue"

  return "end"

In [28]:
# This code block adds conditional edges to the rag_agent graph using the add_conditional_edges() method.
# The conditional edges originate from the "first_action" node and are based on the is_fully_answered function.
# If the is_fully_answered function returns "continue", the graph transitions to the "agent" node.
# If the is_fully_answered function returns "end", the graph transitions to the END node, indicating the end of the graph execution.
rag_agent.add_conditional_edges(
    "first_action",
    is_fully_answered,
    {
        "continue" : "agent",
        "end" : END
    }
)

In [29]:
# This code block defines a function named should_continue that takes the state as input.
# The should_continue function checks the last message in the state to determine whether to continue or end the graph execution.
# If the last message does not have a "function_call" in its additional_kwargs, it returns "end".
# Otherwise, it returns "continue".
# It adds conditional edges to the rag_agent graph using the add_conditional_edges() method.
# The conditional edges originate from the "agent" node and are based on the should_continue function.
# If the should_continue function returns "continue", the graph transitions to the "action" node.
# If the should_continue function returns "end", the graph transitions to the END node, indicating the end of the graph execution.
def should_continue(state):
  last_message = state["messages"][-1]

  if "function_call" not in last_message.additional_kwargs:
    return "end"

  return "continue"

rag_agent.add_conditional_edges(
    "agent",
    should_continue,
    {
        "continue" : "action",
        "end" : END
    }
)

In [30]:
# This code block adds an edge to the rag_agent graph using the add_edge() method.
# The edge connects the "action" node to the "agent" node, indicating that after executing the "action" node, the graph should transition back to the "agent" node.
rag_agent.add_edge("action", "agent")

In [31]:
# This code block compiles the rag_agent graph using the compile() method.
# The compile() method prepares the graph for execution by optimizing and validating the graph structure.
# The compiled graph is assigned to the variable rag_agent_app, which can be used to invoke the graph with input data.
rag_agent_app = rag_agent.compile()

In [32]:
# This code block defines a question string: "INSERT A QUESTION".
# It creates a dictionary named inputs containing a list of messages, which consists of a single HumanMessage with the content set to the defined question.
# It invokes the compiled rag_agent_app using the invoke() method, passing the inputs dictionary as an argument.
# The rag_agent_app processes the input message, executes the defined nodes and edges based on the graph structure, and returns the generated response.
question = "Who won the Nobel Prize for their work with CRISPR?"

inputs = {"messages" : [HumanMessage(content=question)]}

rag_agent_app.invoke(inputs)

{'messages': [HumanMessage(content='Who won the Nobel Prize for their work with CRISPR?'),
  AIMessage(content="I don't know.", response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 3080, 'total_tokens': 3085}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}),
  AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"query":"Nobel Prize winner for CRISPR"}', 'name': 'duckduckgo_search'}}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 173, 'total_tokens': 197}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'function_call', 'logprobs': None}),
  FunctionMessage(content='The approval of the first Crispr-based therapy is just the beginning. Getting it to patients is the next hurdle. ... Berkeley, who shared the Nobel Prize in Chemistry in 2020 for her role in the ... 4) The 2020 Nobel For The Hepatitis C virus. Bef

In [34]:
# This code block defines another question string: "INSERT A QUESTION".
# It creates a dictionary named inputs containing a list of messages, which consists of a single HumanMessage with the content set to the defined question.
# It invokes the compiled rag_agent_app using the invoke() method, passing the inputs dictionary as an argument.
# The rag_agent_app processes the input message, executes the defined nodes and edges based on the graph structure, and returns the generated response.
question = "Who won the Nobel Prize for their work with CRISPR and what University did they attend?"

inputs = {"messages" : [HumanMessage(content=question)]}

rag_agent_app.invoke(inputs)

{'messages': [HumanMessage(content='Who won the Nobel Prize for their work with CRISPR and what University did they attend?'),
  AIMessage(content="I don't know.", response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 3091, 'total_tokens': 3096}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}),
  AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"query":"Nobel Prize winner for CRISPR and their University"}', 'name': 'duckduckgo_search'}}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 179, 'total_tokens': 206}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'function_call', 'logprobs': None}),
  FunctionMessage(content="This year's Nobel Prize in Physiology or Medicine goes to a transformative medical technology that significantly altered the path of the pandemic and saved millions: the mRNA vaccines agains

CRISPR Questions:
1.	What are the potential therapeutic applications of CRISPR-Cas9, and which genetic disorders are currently being targeted in clinical trials?
2.	How has the discovery of CRISPR-Cas9 revolutionized the field of gene editing, and what are the key advantages of this technology compared to previous gene editing methods?
3.	What are the ethical concerns surrounding the use of CRISPR-Cas9 for germline editing, and how are regulatory bodies and the scientific community addressing these issues?
4.	How does the CRISPR-Cas9 system differ from other CRISPR-Cas variants, such as CRISPR-Cas12a and CRISPR-Cas13, and what are the unique advantages and applications of each system?
5.	What are the challenges and limitations of using CRISPR-Cas9 for gene editing in humans, and what strategies are being developed to overcome these obstacles and improve the safety and efficacy of CRISPR-based therapies?
1.	How has CRISPR-Cas9 been used to develop novel diagnostic tools for infectious diseases, and what are some examples of CRISPR-based diagnostic platforms currently in development or clinical use?
2.	What are the potential agricultural applications of CRISPR-Cas9, and how can this technology be used to improve crop yields, enhance nutrient content, and confer resistance to pests and environmental stressors?
3.	How have researchers combined CRISPR-Cas9 with other technologies, such as single-cell sequencing and high-throughput screening, to advance our understanding of complex biological systems and disease mechanisms?
4.	What are the current limitations and challenges associated with the delivery of CRISPR-Cas9 components into target cells, and what strategies are being explored to improve the efficiency and specificity of CRISPR delivery?
5.	How are researchers and biotech companies working to develop CRISPR-based therapies for cancer, and what are some of the key targets and approaches being investigated in preclinical and clinical studies?


In [35]:
# This code block defines another question string: "INSERT A QUESTION".
# It creates a dictionary named inputs containing a list of messages, which consists of a single HumanMessage with the content set to the defined question.
# It invokes the compiled rag_agent_app using the invoke() method, passing the inputs dictionary as an argument.
# The rag_agent_app processes the input message, executes the defined nodes and edges based on the graph structure, and returns the generated response.
question = "What city is the capital of Illinois?"
inputs = {"messages" : [HumanMessage(content=question)]}

rag_agent_app.invoke(inputs)

{'messages': [HumanMessage(content='What city is the capital of Illinois?'),
  AIMessage(content="I don't know.", response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 3427, 'total_tokens': 3432}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}),
  AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"query":"capital of Illinois"}', 'name': 'duckduckgo_search'}}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 168, 'total_tokens': 187}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'function_call', 'logprobs': None}),
  FunctionMessage(content="Springfield is the capital city of the U.S. state of Illinois and the county seat of Sangamon County.The city's population was 114,394 at the 2020 census, which makes it the state's seventh most-populous city, the second largest outside of the Chicago metropolitan area (