In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
from pydantic import BaseModel, Field

import nest_asyncio
nest_asyncio.apply()

from llama_index.core.prompts import PromptTemplate
from llama_index.core.output_parsers.pydantic import PydanticOutputParser
from llama_index.core import Settings
from llama_index.agent.introspective import SelfReflectionAgentWorker
from llama_index.agent.introspective import IntrospectiveAgentWorker
from llama_index.core.agent import ReActAgent, StructuredPlannerAgent, FunctionCallingAgentWorker
from pydantic import Field


from llama_index.core.memory import (
    VectorMemory,
    SimpleComposableMemory,
    ChatMemoryBuffer,
)
from llama_index.core.llms import ChatMessage

import plotreader
from plotreader.utils.document import ScientificPaperHandler

In [3]:
handler = ScientificPaperHandler(
    filepath="/Users/loyalshababo/dev/plotreader/sandbox/storage/tmp/nihms-1538039.pdf",
    document_id = "3de6841c-fba6-4124-a28a-2c53124202af",
    name="nihms-1538039",
    desc="A scientific paper",
    storage_dir = "./storage",
    use_cache = False
)

In [4]:
query_tool = handler.query_engine_tool(top_k = 10)

In [6]:
vector_memory = VectorMemory.from_defaults(
    vector_store=None,  # leave as None to use default in-memory vector store
    retriever_kwargs={"similarity_top_k": 3},
)

msgs = [
    ChatMessage.from_str("You are an assistant who can look at text and figures from academic papers and answer quantitative questions about the data in the figures.", "system"),
]
vector_memory.set(msgs)

chat_memory_buffer = ChatMemoryBuffer.from_defaults()

composable_memory = SimpleComposableMemory.from_defaults(
    primary_memory=chat_memory_buffer,
    secondary_memory_sources=[vector_memory],
)

In [7]:
query_worker = FunctionCallingAgentWorker.from_tools(
    [query_tool],
    verbose = True,
).as_agent(memory=composable_memory)

In [8]:
query_worker.chat("How many figures are in this paper?")

Added user message to memory: How many figures are in this paper?
=== LLM Response ===
To answer your question about how many figures are in the paper, I'll need to query the document using the available tool. Let me do that for you.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "How many figures are in this paper?"}
=== Function Output ===
Based on the information provided in the excerpts, we can infer that there are at least 4 main figures in this paper. This conclusion is drawn from the following references:

1. "Figure 1" is mentioned multiple times in different excerpts.
2. "Figure 2" is explicitly mentioned in one excerpt.
3. "Figure 3" is explicitly mentioned in one excerpt.
4. "Figure 4" is explicitly mentioned in one excerpt.

Additionally, there are mentions of Supplemental Figures (e.g., Supplemental Figure 1a, 1b, 1c, 1d), which are likely not counted as main figures but are part of the supplementary materials.

It's im

AgentChatResponse(response="Based on the information retrieved from the document, I can answer your question about the number of figures in this paper.\n\nThe paper contains at least 4 main figures. This conclusion is drawn from explicit mentions of Figure 1, Figure 2, Figure 3, and Figure 4 in various parts of the document.\n\nIt's worth noting that the paper also includes Supplemental Figures (such as Supplemental Figure 1a, 1b, 1c, 1d), which are typically not counted as main figures but are part of the supplementary materials.\n\nHowever, it's important to keep in mind that there could potentially be more than 4 figures in the full paper. The information provided is based on the excerpts available, which may not cover the entire document. Therefore, while we can confidently say there are at least 4 main figures, there could be additional figures not mentioned in the excerpts we have access to.", sources=[ToolOutput(content='Based on the information provided in the excerpts, we can 

In [10]:
query_worker.chat("Summarize Figure 1.")

Added user message to memory: Summarize Figure 1.
=== LLM Response ===
Certainly! I'll use the available tool to query the document for information about Figure 1 and provide you with a summary.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Summarize Figure 1 and its contents"}
=== Function Output ===
Figure 1 is not directly shown in the provided images. However, based on the information given, I can summarize what Figure 1 likely contains and represents in the context of the study:

Figure 1 appears to illustrate the machine learning-guided optimization process for channelrhodopsins (ChRs). The figure likely includes the following key components:

1. Photocurrent properties: It would show metrics for evaluating ChR performance, including peak and steady-state current, off-kinetics (current decay rate and time to 50% current), and wavelength sensitivity.

2. Classification models: This section would demonstrate how machine learni

AgentChatResponse(response="Based on the information retrieved, I can provide you with a summary of Figure 1. Although the actual image of Figure 1 is not directly available, the content and purpose of the figure can be inferred from the context provided. Here's a summary of Figure 1:\n\nFigure 1 illustrates the machine learning-guided optimization process for channelrhodopsins (ChRs). The figure likely contains several key components:\n\n1. Photocurrent Properties: It shows metrics used to evaluate ChR performance, including peak and steady-state current, off-kinetics (current decay rate and time to 50% current), and wavelength sensitivity.\n\n2. Classification Models: This section demonstrates how machine learning models were trained to predict ChR localization and function. A threshold of 0.4 for the product of predicted probabilities was used for classification.\n\n3. Regression Models: It illustrates how models were trained to approximate the fitness landscape for different ChR pr

In [11]:
query_worker.chat("Summarize Figure 2.")

Added user message to memory: Summarize Figure 2.
=== LLM Response ===
Certainly! I'll use the available tool to query the document for information about Figure 2 and provide you with a summary.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Summarize Figure 2 and its contents"}
=== Function Output ===
Figure 2 demonstrates the wide range of functional properties exhibited by model-predicted channelrhodopsins (ChRs), often surpassing those of their parent variants. The figure is divided into several panels (a-e) that illustrate different aspects of ChR performance:

a) This panel shows representative current traces for various ChR variants, including parental and designer ChRs. Each trace represents the photocurrent response to a 0.5-second light exposure. The vertical scale bar represents 500 pA, and the horizontal scale bar represents 250 ms. Different colors are used to distinguish between variants, and these colors are consiste

AgentChatResponse(response='Based on the information retrieved, I can provide you with a summary of Figure 2. This figure demonstrates the wide range of functional properties exhibited by model-predicted channelrhodopsins (ChRs), often surpassing those of their parent variants. The figure is divided into several panels (a-f), each illustrating different aspects of ChR performance:\n\na) Representative Current Traces: This panel shows photocurrent responses of various ChR variants (including parental and designer ChRs) to a 0.5-second light exposure. The vertical scale bar represents 500 pA, and the horizontal scale bar represents 250 ms. Different colors are used to distinguish between variants, and these colors are consistent throughout the figure.\n\nb) Peak and Steady-State Photocurrents: This section displays measured peak and steady-state photocurrents for different wavelengths of light (397 nm, 481 nm, 546 nm, and 640 nm) in HEK cells. It shows how different ChR variants respond 

In [12]:
query_worker.chat("Summarize Figure 3.")

Added user message to memory: Summarize Figure 3.
=== LLM Response ===
Certainly! I'll use the available tool to query the document for information about Figure 3 and provide you with a summary.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Summarize Figure 3 and its contents"}
=== Function Output ===
Figure 3 demonstrates the performance of ChRger variants compared to commonly used channelrhodopsins (ChRs) like ChR2(H134R) and CoChR in cultured neurons and acute brain slices. The figure contains multiple panels (a-n) that illustrate various aspects of the ChRgers' functionality:

a) Shows the AAV vector design used for expressing the ChRs.

b) Displays images of cultured neurons expressing ChR2, ChRger1, ChRger2, and ChRger3 under the hSyn promoter.

c) Compares peak and steady-state photocurrents at low (8x10^-3 mW/mm^2) and moderate (0.8 mW/mm^2) light intensities for different ChRs in cultured neurons.

d) Shows spike fidelity

AgentChatResponse(response="Based on the information retrieved, I can provide you with a summary of Figure 3. This figure demonstrates the performance of ChRger variants compared to commonly used channelrhodopsins (ChRs) like ChR2(H134R) and CoChR in cultured neurons and acute brain slices. The figure is divided into multiple panels (a-n), each illustrating different aspects of the ChRgers' functionality:\n\na) AAV Vector Design: Shows the design used for expressing the ChRs.\n\nb) Expression in Cultured Neurons: Displays images of cultured neurons expressing ChR2, ChRger1, ChRger2, and ChRger3 under the hSyn promoter.\n\nc) Photocurrent Comparison: Compares peak and steady-state photocurrents at low (8x10^-3 mW/mm^2) and moderate (0.8 mW/mm^2) light intensities for different ChRs in cultured neurons.\n\nd) Spike Fidelity vs. Light Intensity: Shows spike fidelity with varying light intensities for 5 ms light pulses at 2 Hz stimulation.\n\ne) Spike Fidelity vs. Stimulation Frequency: Il

In [13]:
query_worker.chat("Summarize Figure 4.")

Added user message to memory: Summarize Figure 4.
=== LLM Response ===
Certainly! I'll use the available tool to query the document for information about Figure 4 and provide you with a summary.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Summarize Figure 4 and its contents"}
=== Function Output ===
I apologize, but there is no Figure 4 shown in the provided images. The images contain Figure 1 and Figure 2. I'll summarize the contents of these figures instead:

Figure 1 (second image):
This figure illustrates the machine learning-guided optimization process for channelrhodopsins (ChRs). It includes:

a) Photocurrent properties: Describes metrics for photocurrent strength, off-kinetics, and wavelength sensitivity.

b) Classification model training: Shows how models were trained to predict ChR localization and function.

c) Regression model training: Demonstrates training of models to predict current strength, off-kinetics, and sp

AgentChatResponse(response="I apologize for the confusion in my previous response. It appears that Figure 4 is not present in the provided images. The available information only covers Figures 1 and 2. Since you specifically asked about Figure 4, and we don't have direct information about it, I cannot provide a summary of Figure 4.\n\nHowever, I can offer some context based on the information we have:\n\n1. Figure 1 focused on the machine learning-guided optimization process for channelrhodopsins (ChRs).\n2. Figure 2 demonstrated the functional properties of model-predicted ChRs compared to parent variants.\n\nGiven this progression, Figure 4 might potentially cover:\n\n1. Further in vivo applications or experiments using the optimized ChRs.\n2. Comparisons of the new ChRs with other optogenetic tools in specific neuroscience experiments.\n3. Detailed analysis of specific properties or applications of the most promising ChR variants.\n4. Potential limitations or future directions for t

In [14]:
query_worker.memory.get("Provide a detailed description of the first Panel of Figure 2.")

[ChatMessage(role=<MessageRole.USER: 'user'>, content='Summarize Figure 2.', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content="Certainly! I'll use the available tool to query the document for information about Figure 2 and provide you with a summary.", additional_kwargs={'tool_calls': [{'id': 'toolu_01ACG1XQd5YxpGrqHkxcbckT', 'input': {'input': 'Summarize Figure 2 and its contents'}, 'name': 'nihms-1538039_multimodal_vector_tool', 'type': 'tool_use'}]}),
 ChatMessage(role=<MessageRole.TOOL: 'tool'>, content='Figure 2 demonstrates the wide range of functional properties exhibited by model-predicted channelrhodopsins (ChRs), often surpassing those of their parent variants. The figure is divided into several panels (a-e) that illustrate different aspects of ChR performance:\n\na) This panel shows representative current traces for various ChR variants, including parental and designer ChRs. Each trace represents the photocurrent response to a 0.5-second

In [15]:
query_worker.chat("Figure out which image in the query tool is Figure 2.")

Added user message to memory: Figure out which image in the query tool is Figure 2.
=== LLM Response ===
I apologize for the confusion in my previous responses. You're right, I should verify which image is actually Figure 2. I'll use the query tool to check this information.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Which image is Figure 2? Describe its contents briefly."}
=== Function Output ===
The first image in the set is Figure 3, not Figure 2. However, I can describe the contents of this image briefly:

This image (Figure 3) is a complex multi-panel figure that shows various experimental results related to the performance of ChRger variants compared to other channelrhodopsins (ChRs) like ChR2(H134R) and CoChR in both cultured neurons and acute brain slices.

The figure includes:

a) A schematic of the genetic construct used.
b) Microscopy images of cultured neurons expressing different ChR variants.
c) Bar graphs compari

AgentChatResponse(response="Thank you for prompting me to double-check. I apologize for the earlier confusion. Based on the query results, I can now provide accurate information:\n\nThe image that was previously thought to be Figure 2 is actually Figure 3. This explains why we couldn't find Figure 4 earlier, as Figure 3 was the last figure in the set of images provided.\n\nFigure 3 is a complex, multi-panel figure that demonstrates the performance of ChRger variants compared to commonly used channelrhodopsins (ChRs) like ChR2(H134R) and CoChR in both cultured neurons and acute brain slices. Here's a brief summary of its contents:\n\na) A schematic of the genetic construct used for expressing the ChRs.\nb) Microscopy images showing cultured neurons expressing different ChR variants.\nc) Bar graphs comparing photocurrent strengths under different light intensities.\nd-e) Graphs illustrating spike fidelity under various conditions.\nf) Voltage traces demonstrating neuronal firing patterns

In [16]:
vector_memory = VectorMemory.from_defaults(
    vector_store=None,  # leave as None to use default in-memory vector store
    retriever_kwargs={"similarity_top_k": 3},
)

msgs = [
    ChatMessage.from_str("You are an assistant who can look at text and figures from academic papers and answer quantitative questions about the data in the figures.", "system"),
]
vector_memory.set(msgs)

chat_memory_buffer = ChatMemoryBuffer.from_defaults()

composable_memory = SimpleComposableMemory.from_defaults(
    primary_memory=chat_memory_buffer,
    secondary_memory_sources=[vector_memory],
)

query_worker = FunctionCallingAgentWorker.from_tools(
    [query_tool],
    verbose = True,
)

agent = StructuredPlannerAgent(
    query_worker, 
    tools=[query_tool], 
    verbose=True,
    # initial_plan_prompt=INITIAL_PLAN_PROMPT,
    # plan_refine_prompt=PLAN_REFINE_PROMPT,
    memory=composable_memory
)

In [18]:
agent.chat("Summarize Figure 1 using only information in the main body of the paper.")

=== Initial plan ===
Locate_Figure_1:
Find Figure 1 in the scientific paper. -> Confirmation that Figure 1 has been located in the paper.
deps: []


Identify_Figure_1_References:
Identify all references to Figure 1 in the main body of the paper. -> A list of sentences or paragraphs that mention or describe Figure 1.
deps: ['Locate_Figure_1']


Extract_Figure_1_Information:
Extract all relevant information about Figure 1 from the main body of the paper, excluding captions or legends. -> A compilation of all information related to Figure 1 found in the main text.
deps: ['Identify_Figure_1_References']


Organize_Information:
Organize the extracted information about Figure 1 in a logical order. -> A structured outline of the information about Figure 1.
deps: ['Extract_Figure_1_Information']


Summarize_Figure_1:
Create a concise summary of Figure 1 using only the organized information from the main body of the paper. -> A comprehensive summary of Figure 1 based solely on information from 

AgentChatResponse(response="Certainly! I'll create a summary of Figure 1 based on the limited information available, clearly stating the limitations and any inferences made. Here's the summary:\n\nSummary of Figure 1:\n\nTitle: Model-predicted Channelrhodopsins (ChRs) with Diverse Functional Properties\n\nContent:\n1. Machine Learning Approach:\n   - Likely illustrates the Gaussian process (GP) classification and regression models used.\n   - May show inputs (ChR sequence/structure data) and outputs (predicted properties).\n\n2. Designer ChR Variants:\n   - Showcases selected variants: ChR_21_10, ChR_25_9, ChR_11_10, ChR_15_10.\n   - Compares to parental ChRs: CheRiff, CsChrimR, C1C2.\n\n3. Functional Properties Displayed:\n   a) Representative current traces\n   b) Expression and localization in HEK cells\n   c) Peak and steady-state photocurrent under different light wavelengths\n   d) Off-kinetics decay rate (τoff)\n   e) Photocurrent strength at varying light irradiances\n   f) Wav

In [19]:
agent.memory.get("Tell me about one panel in Figure 1.")

[ChatMessage(role=<MessageRole.USER: 'user'>, content='Compile all available information about Figure 1 from the accessible parts of the paper, including any mentions in figure lists or references.', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content="Certainly! I'll use the tool to gather all available information about Figure 1 from the accessible parts of the paper, including any mentions in figure lists or references.", additional_kwargs={'tool_calls': [{'id': 'toolu_018zhPXz5iVkvwiGmK3c1UyV', 'input': {'input': 'Compile all available information about Figure 1 from the accessible parts of the paper, including any mentions in figure lists or references'}, 'name': 'nihms-1538039_multimodal_vector_tool', 'type': 'tool_use'}]}),
 ChatMessage(role=<MessageRole.TOOL: 'tool'>, content="Based on the provided information, I can compile the following details about Figure 1:\n\n1. Figure 1 is mentioned multiple times in the document, specifically on pages 

In [None]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.agent.anthropic import AnthropicAgent
from llama_index.llms.anthropic import Anthropic
from llama_index.postprocessor.cohere_rerank import CohereRerank

# Assuming 'handler' is an instance of ScientificPaperHandler
# Get the query engine from the handler
query_engine = handler.query_engine

# Create a Cohere Reranker
cohere_rerank = CohereRerank(api_key="your_cohere_api_key", top_n=3)

# Add the Cohere Reranker to the query engine's node postprocessors
query_engine.node_postprocessors.append(cohere_rerank)

# Create a QueryEngineTool using the query engine from the handler
rag_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="scientific_paper_rag",
        description="Use this tool to perform retrieval-augmented generation (RAG) over the loaded scientific paper."
    )
)

# Create an AnthropicAgent with the RAG tool using Claude 3 Sonnet
llm = Anthropic(model="claude-3-sonnet-20240229")
rag_agent = AnthropicAgent.from_tools(
    [rag_tool],
    llm=llm,
    verbose=True
)

# Example usage:
# response = rag_agent.chat("What does the paper say about Figure 1?")
# print(response)





