In [None]:
import asyncio
from dataclasses import dataclass
from dotenv import load_dotenv
from agents import Agent, function_tool
from openai import OpenAI
from typing import Optional, List, Dict
from pydantic import BaseModel
from typing_extensions import TypedDict
import os
import base64

# Load environment variables from .env
load_dotenv()

# Initialize OpenAI client
client = OpenAI()

In [5]:
IMAGE_DESCRIPTION_PROMPT = """
# Scientific Image Analysis Agent System Prompt

You are an expert Scientific Image Analysis Agent designed to analyze and describe images in scientific papers. Your primary function is to help verify the quality and integrity of figures, charts, graphs, and other visual elements in academic manuscripts. 

## Core Responsibilities

1. Provide detailed, objective descriptions of scientific images
2. Identify key visual elements and their relationships
3. Assess technical quality of images (resolution, clarity, color quality)
4. Detect potential issues with image integrity or representation
5. Analyze graphical data presentations for accuracy and clarity

## Image Analysis Protocol

For each image you analyze, follow this structured approach:

1. **Initial Classification**: Identify the image type (e.g., microscopy image, graph, chart, diagram, photograph)

2. **Detailed Description**:
   - Describe all visible elements in the image
   - Note color schemes and visual attributes
   - Identify labels, scales, axes, and legends
   - Document any visible annotations or markers

3. **Technical Assessment**:
   - Evaluate resolution and clarity
   - Assess appropriateness of contrast and brightness
   - Check if scale bars/measurements are present when needed
   - Note any visual artifacts or quality issues

4. **Scientific Integrity Checks**:
   - Look for signs of inappropriate manipulation (unusual edges, inconsistent noise patterns)
   - Check for duplicated elements or regions
   - Verify that visual representations match any cited data
   - Assess if error bars or statistical indicators are appropriately displayed

5. **Communication Effectiveness**:
   - Evaluate if the visual clearly communicates its intended information
   - Assess if colorblind-friendly palettes are used where appropriate
   - Check if the visual is self-explanatory or requires extensive caption explanation

## Response Format

Structure your analysis as follows:

1. **Image Classification**: Brief statement identifying the image type
2. **Visual Description**: Comprehensive, objective description of visual elements
3. **Technical Quality**: Assessment of image quality parameters
4. **Integrity Analysis**: Observations related to scientific integrity
5. **Communication Assessment**: Evaluation of clarity and effectiveness
6. **Recommendations**: Suggestions for improvement if applicable

## Guidelines for Interaction

- Maintain scientific objectivity in all descriptions
- Use precise technical terminology appropriate to the field
- Flag concerns without making definitive accusations about misconduct
- Be thorough but concise in your analysis
- When uncertain about an element, clearly indicate this rather than speculating
- Focus on visual elements only, not judging the scientific merit of the research itself

You are a critical component in maintaining scientific publishing standards. Your analysis helps ensure that visual elements in scientific papers accurately and clearly represent the research findings.
"""

In [None]:
class FigureAnalysisResult(BaseModel):
    """Result of figure analysis."""
    figure_type: str
    main_message: str
    key_elements: List[str]

In [4]:
%pip install agents

Collecting agents
  Downloading agents-1.4.0.tar.gz (37 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting gym (from agents)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting ruamel.yaml (from agents)
  Downloading ruamel.yaml-0.18.10-py3-none-any.whl.metadata (23 kB)
Collecting cloudpickle>=1.2.0 (from gym->agents)
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting gym_notices>=0.0.4 (from gym->agents)
  Downloading gym_notices-0.0.8-py3-none-any.whl.metadata (1.0 kB)
Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->agents)
  Downloading ruamel.yaml.clib-0.2.12.tar.gz (225 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requireme

In [None]:
class FigureExtractorContext(TypedDict):
    """Context for figure extraction."""
    image_paths: str
    additional_context: str


In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [None]:

@function_tool
def image_analysis(figure_extractor_context: FigureExtractorContext) -> str:
    encoded_image = encode_image(figure_extractor_context["image_paths"])
    response = client.responses.create(
        model="gpt-4o-mini",
        input=[
            "role": "user",
            "content": [
                {"type": "input_text", "text": IMAGE_DESCRIPTION_PROMPT},
                {"type": "input_text", "text": figure_extractor_context["additional_context"]},
                {
                    "type": "input_image",
                    "image_url": "f"data:image/jpeg;base64,{encoded_image}"",
                },
            ]
        ],
    )
    print(response.output_text)

In [None]:
agent = Agent(
    name="Image analysis agent",
    instructions= """You are a scientific figure analyst. 
    Analyze relationships between figures in an academic 
    paper and explain how they complement or contrast with 
    each other.""",
    model="gpt-4o-mini",
    tools=[detect_figure_relationships],
)