In [1]:
%load_ext autoreload
%autoreload 2

In [19]:
from pydantic import BaseModel, Field

import nest_asyncio
nest_asyncio.apply()

from llama_index.core.prompts import PromptTemplate
from llama_index.core.output_parsers.pydantic import PydanticOutputParser
from llama_index.core import Settings
from llama_index.agent.introspective import SelfReflectionAgentWorker
from llama_index.agent.introspective import IntrospectiveAgentWorker
from llama_index.core.agent import ReActAgent, StructuredPlannerAgent, FunctionCallingAgentWorker
from llama_index.core.vector_stores.types import MetadataFilters, MetadataFilter, FilterOperator, FilterCondition

from pydantic import Field


from llama_index.core.memory import (
    VectorMemory,
    SimpleComposableMemory,
    ChatMemoryBuffer,
)
from llama_index.core.llms import ChatMessage

import plotreader
from plotreader.utils.document import ScientificPaperHandler

In [3]:
handler = ScientificPaperHandler(
    filepath="/Users/loyalshababo/dev/plotreader/sandbox/storage/tmp/nihms-1538039.pdf",
    document_id = "3de6841c-fba6-4124-a28a-2c53124202af",
    name="nihms-1538039",
    desc="A scientific paper",
    storage_dir = "./storage",
    use_cache = False
)

In [20]:
fig_num = 2

filters = MetadataFilters(filters=[
    MetadataFilter(
        key="fig_refs", 
        value=f'Figure {fig_num}',
        operator=FilterOperator.TEXT_MATCH_LIST
    # ),
    # MetadataFilter(
    #     key="fig_refs", 
    #     value='Figure 2d',
    #     operator=FilterOperator.TEXT_MATCH_LIST
    # ),
    # MetadataFilter(
    #     key="fig_refs", 
    #     value='Figure 2e',
    #     operator=FilterOperator.TEXT_MATCH_LIST
    )],
    condition=FilterCondition.OR
)

query_tool = handler.query_engine_tool(top_k=10, metadata_filters=filters)

In [21]:
vector_memory = VectorMemory.from_defaults(
    vector_store=None,  # leave as None to use default in-memory vector store
    retriever_kwargs={"similarity_top_k": 3},
)

msgs = [
    ChatMessage.from_str("You are an assistant who can look at text and figures from academic papers and answer quantitative questions about the data in the figures.", "system"),
]
vector_memory.set(msgs)

chat_memory_buffer = ChatMemoryBuffer.from_defaults()

composable_memory = SimpleComposableMemory.from_defaults(
    primary_memory=chat_memory_buffer,
    secondary_memory_sources=[vector_memory],
)

In [22]:
query_worker = FunctionCallingAgentWorker.from_tools(
    [query_tool],
    verbose = True,
).as_agent(memory=composable_memory)

In [6]:
query_worker.chat("How many figures are in this paper?")

Added user message to memory: How many figures are in this paper?
=== LLM Response ===
To answer your question about how many figures are in the paper, I'll need to query the document using the available tool. Let me do that for you.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "How many figures are in this paper?"}
=== Function Output ===
Based on the provided information, I cannot determine the exact total number of figures in this paper. The excerpts mention Figure 1 and Figure 2, as well as some supplemental figures (e.g., Supplemental Figure 1c and 1d). However, there is no comprehensive list or explicit statement about the total number of figures in the paper.

The excerpts provide details about Figure 1 and briefly mention Figure 2, but they do not give information about any figures beyond that. There may be additional main figures or supplemental figures that are not mentioned in these excerpts.

Without more complete info

AgentChatResponse(response="I apologize, but I cannot provide an exact count of the figures in this paper based on the information available. The tool's response indicates that the document mentions Figure 1 and Figure 2, as well as some supplemental figures (like Supplemental Figure 1c and 1d). However, there isn't a comprehensive list or explicit statement about the total number of figures in the paper.\n\nThe information provided is incomplete, and we can't be certain if there are additional figures beyond those mentioned. Without access to the full paper or a table of contents, it's not possible to give an accurate count of all the figures.\n\nWhat we do know is that there are at least:\n1. Two main figures (Figure 1 and Figure 2)\n2. Some supplemental figures (at least up to Supplemental Figure 1d)\n\nBut this is likely not the complete count. If you need the exact number of figures, you might want to look at the full paper, particularly the table of contents or list of figures if

In [7]:
query_worker.memory.reset()

In [10]:
query_worker.chat('Summarize Figure 2. Use term "panel" to reference subsections of the figure, often denoted by letters. Use the word "plot" to refer to a particular graph or subplot in a panel.')

Added user message to memory: Summarize Figure 2. Use term "panel" to reference subsections of the figure, often denoted by letters. Use the word "plot" to refer to a particular graph or subplot in a panel.
=== LLM Response ===
Certainly! I'll use the multimodal vector tool to analyze Figure 2 and provide a summary. Let me retrieve the information for you.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Describe Figure 2 in detail, including all panels and plots. Use the terms \"panel\" and \"plot\" in your description."}
=== Function Output ===
Figure 2 illustrates the functional properties of model-predicted channelrhodopsins (ChRs) compared to parental ChRs. The figure is composed of multiple panels (a-f) that showcase different aspects of ChR performance.

Panel a: This panel displays representative current traces for select designer ChR variants and parental ChRs. Each trace shows the photocurrent response to a 0.5-second light

AgentChatResponse(response="Based on the detailed description provided by the tool, I'll summarize Figure 2 for you:\n\nFigure 2 presents a comprehensive comparison of functional properties between model-predicted channelrhodopsins (ChRs) and their parental counterparts. The figure is divided into six panels (a-f), each focusing on different aspects of ChR performance.\n\nPanel a showcases representative current traces for selected designer ChR variants and parental ChRs. These traces illustrate the photocurrent response to a 0.5-second light exposure, providing a visual representation of how different ChRs react to light stimulation.\n\nPanel b contains four plots that display measured peak and steady-state photocurrents at various wavelengths (397 nm, 481 nm, 546 nm, and 640 nm) in HEK cells. These plots demonstrate that some designer ChRs exhibit significantly higher photocurrents than the parental ChRs, particularly at 481 nm.\n\nPanel c features a plot of the off-kinetics decay ra

In [11]:
response = query_worker.chat("Summarize Figure 1.")


Added user message to memory: Summarize Figure 1.
=== LLM Response ===
Certainly! I'll use the multimodal vector tool to analyze Figure 1 and provide a summary for you.
=== Calling Function ===
Calling function: nihms-1538039_multimodal_vector_tool with args: {"input": "Describe Figure 1 in detail, including all panels and plots. Use the terms \"panel\" and \"plot\" in your description."}
=== Function Output ===
Figure 1 illustrates the machine learning-guided optimization process for channelrhodopsins (ChRs). The figure is composed of multiple panels labeled from a to f.

Panel a shows the key photocurrent properties used to evaluate ChRs. It includes schematic plots of current over time, illustrating the peak inward current upon light exposure, the lower steady-state current after desensitization, and the current decay rate (τoff) after light removal.

Panel b displays a classification model plot. The x-axis represents the predicted probability of localizing, while the y-axis shows t

In [12]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='7f893c0d-4e37-4687-b641-143e121a64ac', embedding=None, metadata={'images': [], 'page_num': 22, 'parsed_section_title': 'References>Figure 1.', 'is_aux_text': False, 'summary': 'The model-predicted ChRs exhibit a large range of functional properties often far exceeding the parents, with detailed measurements of current traces, photocurrent, off-kinetics decay rate, and wavelength sensitivity.', 'page_numbers': [22], 'fig_refs': ['Figure 1'], 'contains_fig_caption': True}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='The model-predicted ChRs exhibit a large range of functional properties often far exceeding the parents.\n(a) Representative current traces after 0.5 s light exposure for select designer ChR variants with corresponding expression and localization in HEK cells.\nVertical colored scale bar for each ChR current trace represents 500 pA, and horizontal scale bar represents 250 ms. The variant color prese

In [23]:
class Variable(BaseModel):
    name: str
    categorical: bool

class Experiment(BaseModel):
    independent_variables: list[Variable]
    dependent_variables: list[Variable]

class Plot(BaseModel):
    name: str
    experiments: list[Experiment]

class Panel(BaseModel):
    name: str
    plots: list[Plot]

class Figure(BaseModel):
    name: str
    panels: list[Panel]

In [24]:
output_parser = PydanticOutputParser(output_cls=Figure)

prompt = """For each plot in each panel of Figure 2, determine the experiment in terms of Independent variables (IVs) and dependent variable (DV). If the plot or panel does not contain data, return empty lists for IVs and DVs. 
"""

prompt = PromptTemplate(prompt, output_parser=output_parser).format(llm=plotreader._MM_LLM)

In [25]:
response = query_worker.chat(prompt)

Added user message to memory: For each plot in each panel of Figure 2, determine the experiment in terms of Independent variables (IVs) and dependent variable (DV). If the plot or panel does not contain data, return empty lists for IVs and DVs. 



Here's a JSON schema to follow:
{{"$defs": {{"Experiment": {{"properties": {{"independent_variables": {{"items": {{"$ref": "#/$defs/Variable"}}, "title": "Independent Variables", "type": "array"}}, "dependent_variables": {{"items": {{"$ref": "#/$defs/Variable"}}, "title": "Dependent Variables", "type": "array"}}}}, "required": ["independent_variables", "dependent_variables"], "title": "Experiment", "type": "object"}}, "Panel": {{"properties": {{"name": {{"title": "Name", "type": "string"}}, "plots": {{"items": {{"$ref": "#/$defs/Plot"}}, "title": "Plots", "type": "array"}}}}, "required": ["name", "plots"], "title": "Panel", "type": "object"}}, "Plot": {{"properties": {{"name": {{"title": "Name", "type": "string"}}, "experiments": {{"items": 

In [26]:
figure_struct = output_parser.parse(response.response)

In [29]:
fig_description = f"{figure_struct.name}"

for panel in figure_struct.panels:
    fig_description += f"\n\tPanel: {panel.name}\n"
    for plot in panel.plots:
        fig_description += f"\n\t\tPlot: {plot.name}"
        for experiment in plot.experiments:
            fig_description += f"\n\t\t\t{experiment}"

print(fig_description)

Figure 2
	Panel: a

		Plot: Current traces
			independent_variables=[Variable(name='ChR variant', categorical=True), Variable(name='Time', categorical=False)] dependent_variables=[Variable(name='Current', categorical=False)]
		Plot: Expression in HEK cells
			independent_variables=[Variable(name='ChR variant', categorical=True)] dependent_variables=[Variable(name='Expression level', categorical=False)]
	Panel: b

		Plot: Peak and steady-state photocurrents
			independent_variables=[Variable(name='ChR variant', categorical=True), Variable(name='Wavelength', categorical=True), Variable(name='Current type (peak/steady-state)', categorical=True)] dependent_variables=[Variable(name='Photocurrent', categorical=False)]
	Panel: c

		Plot: Off-kinetics decay rate
			independent_variables=[Variable(name='ChR variant', categorical=True)] dependent_variables=[Variable(name='τoff', categorical=False)]
		Plot: Inset: Representative current traces
			independent_variables=[Variable(name='ChR variant'

In [28]:
from typing import Union
fig_num = 2

In [17]:
panel = figure_struct.panels[3]
plot = panel.plots[0]
exp = plot.experiments[0]

class CategoricalVariable(BaseModel):
    name: str
    label_type: str = Field(
        ...,
        description="What type of visual signifier is used to label this class in the plot (e.g. color, markertype, etc.)"
    )
    values: dict[str, Union[str, list[float]]] = Field(
        ...,
        description="A map from Category > Label that can act as a legend for this variable. If the label is color, give the RGB value as a list of float."
    )

class NumericVariable(BaseModel):
    name: str
    values: list[float] = Field(
        ...,
        description="All values for this variable."
    )
    unit: str

plot_struct_str = f"Plot: {plot.name}"
for experiment in plot.experiments:
    plot_struct_str += f"\n\t{experiment}"

ivs_with_values = []

for iv in exp.independent_variables:

    print(iv)
    if iv.categorical:

        var_cls = CategoricalVariable
        prompt = f"""
YOUR OVERALL JOB:
    You are an expert at extracting categorical labels from plots in scientific papers.
    You will always primarily look to the image of the figure.
    If the information is not available or ambiguous in the image, you can look to the text.
    You will always dummy check your answer by counting the number of expected labels in the image.

YOUR CURRENT TASK:
        In Figure {fig_num}{panel.name}, plot title: {plot.name}, what values are taken by the independent variable {iv.name}?
    Return your answer as structured data.
        """
        
    else:
        var_cls = NumericVariable
        prompt = f"""
In Figure {fig_num}{panel.name} (plot name/desc: {plot.name}), what values are taken by the independent variable Wavelength (nm)?
IMPORTANT: 
    BE SURE TO EXAMINE THE TEXT THAT REFERENCES THIS PANEL AND NEARBY PANELS. ESPECIALLY THE METHODS AND RESULTS SECTIONS.
    DO NOT JUST READOUT AXIS TICK VALUES, REPORT THE VALUES OF THE PLOTTED POINTS!!!
    INCLUDE ALL VALUES EVEN IF THEY ARE NOT MEASURED FOR ALL OTHER INDEPENDENT OR DEPENDENT VARIABLES!!
    THERE MAY BE TYPE-OS IN THE TEXT, SO TRY TO RESOLVE DISCREPANCIES IN A PARSIMONIOUS WAY!!
Return your answer as structured data.
        """

    # prompt = f"""In Figure 2, what categories are shown for the independent variable: {exp.independent_variables[1].name}?

    # What categories are shown in this plot for the independent variable: {exp.independent_variables[1].name}?

    # {our_info}
    # """

    output_parser = PydanticOutputParser(output_cls=var_cls)
    prompt = PromptTemplate(prompt, output_parser=output_parser).format(llm=plotreader._MM_LLM)

    # response = query_tool(prompt)
    # text = response.content

    # response = query_agent.query(prompt)
    response = query_tool(prompt)
    text = response.content

    ivs_with_values.append(output_parser.parse(text))
# print(var_struct)


name='Wavelength' categorical=False
name='ChR variant' categorical=True


ValueError: Could not extract json string from output: I apologize, but I don't see a Figure 2d in the provided information that matches the description "Normalized photocurrent vs wavelength". The closest match I can find is Figure 2e, which is described as showing spectral properties of some designer ChRs. However, without being able to see the actual figure, I cannot provide the specific categorical labels for ChR variants or their visual representations.

If you have access to the correct figure and can provide it, I would be happy to analyze it and give you the structured data as requested. Without the actual image, I cannot confidently extract the categorical labels or their visual representations from the plot.

In [None]:
ivs_with_values

In [24]:
prompt = PromptTemplate(
    "Confirm your previous response by looking athe figure itself and ensuring all information is correct. Return a new answer or the previous one if it is correct.", 
    output_parser=output_parser).format(llm=plotreader._MM_LLM
)
response = query_worker.chat(prompt)

Added user message to memory: Confirm your previous response by looking athe figure itself and ensuring all information is correct. Return a new answer or the previous one if it is correct.


Here's a JSON schema to follow:
{{"$defs": {{"Experiment": {{"properties": {{"independent_variables": {{"items": {{"$ref": "#/$defs/Variable"}}, "title": "Independent Variables", "type": "array"}}, "dependent_variables": {{"items": {{"$ref": "#/$defs/Variable"}}, "title": "Dependent Variables", "type": "array"}}}}, "required": ["independent_variables", "dependent_variables"], "title": "Experiment", "type": "object"}}, "Panel": {{"properties": {{"name": {{"title": "Name", "type": "string"}}, "plots": {{"items": {{"$ref": "#/$defs/Plot"}}, "title": "Plots", "type": "array"}}}}, "required": ["name", "plots"], "title": "Panel", "type": "object"}}, "Plot": {{"properties": {{"name": {{"title": "Name", "type": "string"}}, "experiments": {{"items": {{"$ref": "#/$defs/Experiment"}}, "title": "Experiments",

KeyboardInterrupt: 

In [27]:
query_worker.memory.get(prompt)

 ChatMessage(role=<MessageRole.USER: 'user'>, content='For each plot in each panel of Figure 2, determine the experiment in terms of Independent variables (IVs) and dependent variable (DV). If the plot or panel does not contain data, return empty lists for IVs and DVs. \n\n\n\nHere\'s a JSON schema to follow:\n{{"$defs": {{"Experiment": {{"properties": {{"independent_variables": {{"items": {{"$ref": "#/$defs/Variable"}}, "title": "Independent Variables", "type": "array"}}, "dependent_variables": {{"items": {{"$ref": "#/$defs/Variable"}}, "title": "Dependent Variables", "type": "array"}}}}, "required": ["independent_variables", "dependent_variables"], "title": "Experiment", "type": "object"}}, "Panel": {{"properties": {{"name": {{"title": "Name", "type": "string"}}, "plots": {{"items": {{"$ref": "#/$defs/Plot"}}, "title": "Plots", "type": "array"}}}}, "required": ["name", "plots"], "title": "Panel", "type": "object"}}, "Plot": {{"properties": {{"name": {{"title": "Name", "type": "string

In [16]:
vector_memory = VectorMemory.from_defaults(
    vector_store=None,  # leave as None to use default in-memory vector store
    retriever_kwargs={"similarity_top_k": 3},
)

msgs = [
    ChatMessage.from_str("You are an assistant who can look at text and figures from academic papers and answer quantitative questions about the data in the figures.", "system"),
]
vector_memory.set(msgs)

chat_memory_buffer = ChatMemoryBuffer.from_defaults()

composable_memory = SimpleComposableMemory.from_defaults(
    primary_memory=chat_memory_buffer,
    secondary_memory_sources=[vector_memory],
)

query_worker = FunctionCallingAgentWorker.from_tools(
    [query_tool],
    verbose = True,
)

agent = StructuredPlannerAgent(
    query_worker, 
    tools=[query_tool], 
    verbose=True,
    # initial_plan_prompt=INITIAL_PLAN_PROMPT,
    # plan_refine_prompt=PLAN_REFINE_PROMPT,
    memory=composable_memory
)

In [18]:
agent.chat("Summarize Figure 1 using only information in the main body of the paper.")

=== Initial plan ===
Locate_Figure_1:
Find Figure 1 in the scientific paper. -> Confirmation that Figure 1 has been located in the paper.
deps: []


Identify_Figure_1_References:
Identify all references to Figure 1 in the main body of the paper. -> A list of sentences or paragraphs that mention or describe Figure 1.
deps: ['Locate_Figure_1']


Extract_Figure_1_Information:
Extract all relevant information about Figure 1 from the main body of the paper, excluding captions or legends. -> A compilation of all information related to Figure 1 found in the main text.
deps: ['Identify_Figure_1_References']


Organize_Information:
Organize the extracted information about Figure 1 in a logical order. -> A structured outline of the information about Figure 1.
deps: ['Extract_Figure_1_Information']


Summarize_Figure_1:
Create a concise summary of Figure 1 using only the organized information from the main body of the paper. -> A comprehensive summary of Figure 1 based solely on information from 

AgentChatResponse(response="Certainly! I'll create a summary of Figure 1 based on the limited information available, clearly stating the limitations and any inferences made. Here's the summary:\n\nSummary of Figure 1:\n\nTitle: Model-predicted Channelrhodopsins (ChRs) with Diverse Functional Properties\n\nContent:\n1. Machine Learning Approach:\n   - Likely illustrates the Gaussian process (GP) classification and regression models used.\n   - May show inputs (ChR sequence/structure data) and outputs (predicted properties).\n\n2. Designer ChR Variants:\n   - Showcases selected variants: ChR_21_10, ChR_25_9, ChR_11_10, ChR_15_10.\n   - Compares to parental ChRs: CheRiff, CsChrimR, C1C2.\n\n3. Functional Properties Displayed:\n   a) Representative current traces\n   b) Expression and localization in HEK cells\n   c) Peak and steady-state photocurrent under different light wavelengths\n   d) Off-kinetics decay rate (τoff)\n   e) Photocurrent strength at varying light irradiances\n   f) Wav

In [19]:
agent.memory.get("Tell me about one panel in Figure 1.")

[ChatMessage(role=<MessageRole.USER: 'user'>, content='Compile all available information about Figure 1 from the accessible parts of the paper, including any mentions in figure lists or references.', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content="Certainly! I'll use the tool to gather all available information about Figure 1 from the accessible parts of the paper, including any mentions in figure lists or references.", additional_kwargs={'tool_calls': [{'id': 'toolu_018zhPXz5iVkvwiGmK3c1UyV', 'input': {'input': 'Compile all available information about Figure 1 from the accessible parts of the paper, including any mentions in figure lists or references'}, 'name': 'nihms-1538039_multimodal_vector_tool', 'type': 'tool_use'}]}),
 ChatMessage(role=<MessageRole.TOOL: 'tool'>, content="Based on the provided information, I can compile the following details about Figure 1:\n\n1. Figure 1 is mentioned multiple times in the document, specifically on pages 

In [None]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.agent.anthropic import AnthropicAgent
from llama_index.llms.anthropic import Anthropic
from llama_index.postprocessor.cohere_rerank import CohereRerank

# Assuming 'handler' is an instance of ScientificPaperHandler
# Get the query engine from the handler
query_engine = handler.query_engine

# Create a Cohere Reranker
cohere_rerank = CohereRerank(api_key="your_cohere_api_key", top_n=3)

# Add the Cohere Reranker to the query engine's node postprocessors
query_engine.node_postprocessors.append(cohere_rerank)

# Create a QueryEngineTool using the query engine from the handler
rag_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="scientific_paper_rag",
        description="Use this tool to perform retrieval-augmented generation (RAG) over the loaded scientific paper."
    )
)

# Create an AnthropicAgent with the RAG tool using Claude 3 Sonnet
llm = Anthropic(model="claude-3-sonnet-20240229")
rag_agent = AnthropicAgent.from_tools(
    [rag_tool],
    llm=llm,
    verbose=True
)

# Example usage:
# response = rag_agent.chat("What does the paper say about Figure 1?")
# print(response)





