In [1]:
%load_ext autoreload
%autoreload 2

In [39]:
from datasets import load_dataset
from PIL import Image
from numpy.random import permutation
import os
import glob
import nest_asyncio
nest_asyncio.apply()

from llama_index.core import Settings
from llama_index.core.agent import StructuredPlannerAgent, FunctionCallingAgentWorker, ReActAgent, MultimodalReActAgentWorker, AgentRunner, Task
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
from plotreader.document import DirectoryHandler, MultimodalDirectoryHandler
from llama_index.multi_modal_llms.anthropic import AnthropicMultiModal
from llama_index.core.schema import ImageDocument

print(Settings.llm.model)


claude-3-5-sonnet-20240620


In [3]:
ds = load_dataset("futurehouse/lab-bench", "FigQA")
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'ideal', 'distractors', 'canary', 'subtask', 'figure', 'figure-path'],
        num_rows: 181
    })
})

In [4]:
def get_randomized_options(sample):

    options = sample['distractors']
    options.append(sample['ideal'])
    options = permutation(options)

    return options

figqa_prompt_template = "Question: {question}\nChoose from the following answer options: {answer_options}"

In [73]:
fig_handler = MultimodalDirectoryHandler(
    name = 'figure',
    dirpath = './storage/tmp',
    desc = 'The figure to use to answer the question.',
    storage_dir = './storage',
    use_cache=False,
    parsing_instructions = None 
                # """
                # Extract as much information and describe them so someone could potentially simulate new data and plot similiar figures.
                # Attempt to extract all of the quantitative information from these figures including the values used to generate lines and other visual information. 
                # Attempt to estimate the values at each plotted point (not interpolated points) along with any associated labels. Return tables of the values only.
                # """
)

In [74]:
dl_imagedir = "./data_images"
[os.remove(os.path.join(f)) for f in glob.glob(os.path.join(dl_imagedir,"*"))]

tmp_imagedir = "./storage/tmp"
[os.remove(os.path.join(f)) for f in glob.glob(os.path.join(tmp_imagedir,"*"))]

sample = ds['train'][180]
sample['figure'].save('storage/tmp/tmp.pdf')

tools = [fig_handler.query_engine_tool()] + CodeInterpreterToolSpec().to_tool_list()

tool_agent_worker = MultimodalReActAgentWorker.from_tools(
    tools,
    verbose=True,
    # max_function_calls=5,
    multi_modal_llm = AnthropicMultiModal(model='claude-3-5-sonnet-20240620', max_tokens=2048)
)

agent = tool_agent_worker.as_agent()

# agent = StructuredPlannerAgent(
#     tool_agent_worker, 
#     tools=tools, 
#     verbose=True, 
# )

Started parsing the file under job_id b357ef6b-83a7-49e0-8a73-ddea049e0594
> Image for page 1: [{'name': 'page_1.jpg', 'height': 0, 'width': 0, 'x': 0, 'y': 0, 'type': 'full_page_screenshot'}]


In [56]:
# FIG_READER_PROMPT = """
# Your first main task is to desribe the meaning of each visual signifier for each panel. 
# First, aggregate all signifiers and their various meanings across the full figure.
# Be careful, some signifiers may be used multiple times and in different ways across panels.
# If a signifier in a particular panel is ambiguous, make an educated guess based on other information in the figure 
# and other uses of that signifier.
# Make sure to save your final description of what all of the signifiers mean for each panel.
# After your first task, your second main task is to answer the following multiple choice question. 
# First, decide which panels are relevant.
# Then retrieve the signifier descriptions for those panels as a reference to help you answer the question.
# If something is ambiguous, assume you are incorrect about an assumption, not that the question/answers have an issue.
# Look to other parts of the figure or other information in the relevant panels to help correct your assumption.
# {figqa_input}
# """

In [57]:
# FIG_READER_PROMPT = """
# Your first main task is to answer the question at the end of this prompt.
# Follow more or less this plan to do so:
# 1. Summarize the figure.
# 2. Determine which panels are needed to answer the question.
# 3. Determine which auxiliary panels might contain similar or relevant information or have similar graph styles.
# 4. Explain what might be ambiguous in this plot and use the information selected in (2) and (3) to resolve it.
# 5. Explain what every signifier means in the plot.
# 6. Answer the question and explain which visual features enabled you to answer it.
# DO NOT ANSWER A DIFFERENT QUESTION IF YOU THINK INFORMATION IS MISSING OR THE QUESTION IS NOT WELL SPECIFIED. TRY TO FIGURE IT OUT!
# {figqa_input}
# """

In [75]:
FIG_READER_PROMPT = """
Your first main task is to answer the question at the end of this prompt.
After each subtask or step, see if you are ready to answer the question.
If you need more inforamtion, look at the full figure and determine the best part of the figure from which to obtain information.
DO NOT ANSWER A DIFFERENT QUESTION IF YOU THINK INFORMATION IS MISSING OR THE QUESTION IS NOT WELL SPECIFIED. TRY TO FIGURE IT OUT!
{figqa_input}
"""

In [76]:

figqa_prompt = figqa_prompt_template.format(question = sample['question'], answer_options=get_randomized_options(sample))
query = FIG_READER_PROMPT.format(figqa_input=figqa_prompt)

print(query)


Your first main task is to answer the question at the end of this prompt.
After each subtask or step, see if you are ready to answer the question.
If you need more inforamtion, look at the full figure and determine the best part of the figure from which to obtain information.
DO NOT ANSWER A DIFFERENT QUESTION IF YOU THINK INFORMATION IS MISSING OR THE QUESTION IS NOT WELL SPECIFIED. TRY TO FIGURE IT OUT!
Question: Wld S protection of the cerebral cortex and hippocampus following traumatic brain injury occurs from which of the following?
Choose from the following answer options: ['Increased tau acetylation and decreased AIS degradation'
 'Increased tau acetylation and decreased tau mislocalization'
 'Decreased tau acetylation and increased tau mislocalization'
 'Decreased tau acetylation and decreased AIS degradation']



In [83]:
def execute_step(agent: AgentRunner, task: Task):
    step_output = agent.run_step(task.task_id)
    if step_output.is_last:
        response = agent.finalize_response(task.task_id)
        print(f"> Agent finished: {str(response)}")
        return response
    else:
        return None
    
def execute_steps(agent: AgentRunner, task: Task):
    response = execute_step(agent, task)
    while response is None:
        response = execute_step(agent, task)
    return response

In [87]:
image_root = "data_images"
image_document = ImageDocument(image_path=glob.glob(f"{image_root}/*.jpg")[0])

task = agent.create_task(
    query, extra_state={"image_docs": [image_document]}
)

In [88]:
response = execute_steps(agent, task)

TypeError: AnthropicMultiModal.chat() missing 1 required positional argument: 'image_documents'

In [None]:
agent.chat(query)

=== Initial plan ===
Analyze_Figure:
Use the figure_multimodal_vector_tool to examine the figure related to Wld S protection, traumatic brain injury, tau acetylation, tau mislocalization, and AIS degradation. -> A description of the relevant information from the figure, including any data on Wld S protection, tau acetylation, tau mislocalization, and AIS degradation.
deps: []


Interpret_Data:
Based on the information gathered from the figure, interpret the effects of Wld S on tau acetylation, tau mislocalization, and AIS degradation in the context of traumatic brain injury. -> A clear interpretation of how Wld S affects tau acetylation, tau mislocalization, and AIS degradation following traumatic brain injury.
deps: ['Analyze_Figure']


Answer_Question:
Using the interpreted data, determine which of the given answer options correctly describes the Wld S protection of the cerebral cortex and hippocampus following traumatic brain injury. -> The correct answer selected from the given opt

ValueError: Image docs not found in task extra state.

In [None]:
sample['ideal']

'Decreased tau acetylation and decreased AIS degradation'