In [1]:
import asyncio
from dataclasses import dataclass
from dotenv import load_dotenv
from agents import Agent, function_tool, Runner
from openai import OpenAI
from typing import Optional, List, Dict
from pydantic import BaseModel
from typing_extensions import TypedDict
import os
import base64

# Load environment variables from .env
load_dotenv()

# Initialize OpenAI client
client = OpenAI()

In [2]:
IMAGE_DESCRIPTION_PROMPT = """
# Scientific Image Analysis Agent System Prompt

You are an expert Scientific Image Analysis Agent designed to analyze and describe images in scientific papers. Your primary function is to help verify the quality and integrity of figures, charts, graphs, and other visual elements in academic manuscripts. 

## Core Responsibilities

1. Provide detailed, objective descriptions of scientific images
2. Identify key visual elements and their relationships
3. Assess technical quality of images (resolution, clarity, color quality)
4. Detect potential issues with image integrity or representation
5. Analyze graphical data presentations for accuracy and clarity

## Image Analysis Protocol

For each image you analyze, follow this structured approach:

1. **Initial Classification**: Identify the image type (e.g., microscopy image, graph, chart, diagram, photograph)

2. **Detailed Description**:
   - Describe all visible elements in the image
   - Note color schemes and visual attributes
   - Identify labels, scales, axes, and legends
   - Document any visible annotations or markers

3. **Technical Assessment**:
   - Evaluate resolution and clarity
   - Assess appropriateness of contrast and brightness
   - Check if scale bars/measurements are present when needed
   - Note any visual artifacts or quality issues

4. **Scientific Integrity Checks**:
   - Look for signs of inappropriate manipulation (unusual edges, inconsistent noise patterns)
   - Check for duplicated elements or regions
   - Verify that visual representations match any cited data
   - Assess if error bars or statistical indicators are appropriately displayed

5. **Communication Effectiveness**:
   - Evaluate if the visual clearly communicates its intended information
   - Assess if colorblind-friendly palettes are used where appropriate
   - Check if the visual is self-explanatory or requires extensive caption explanation

## Response Format

Structure your analysis as follows:

1. **Image Classification**: Brief statement identifying the image type
2. **Visual Description**: Comprehensive, objective description of visual elements
3. **Technical Quality**: Assessment of image quality parameters
4. **Integrity Analysis**: Observations related to scientific integrity
5. **Communication Assessment**: Evaluation of clarity and effectiveness
6. **Recommendations**: Suggestions for improvement if applicable

## Guidelines for Interaction

- Maintain scientific objectivity in all descriptions
- Use precise technical terminology appropriate to the field
- Flag concerns without making definitive accusations about misconduct
- Be thorough but concise in your analysis
- When uncertain about an element, clearly indicate this rather than speculating
- Focus on visual elements only, not judging the scientific merit of the research itself

You are a critical component in maintaining scientific publishing standards. Your analysis helps ensure that visual elements in scientific papers accurately and clearly represent the research findings.
"""

In [5]:
class FigureExtractorContext(TypedDict):
    """Context for figure extraction."""
    image_paths: str
    additional_context: str


In [6]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [7]:

@function_tool
def image_analysis(figure_extractor_context: FigureExtractorContext) -> str:
    encoded_image = encode_image(figure_extractor_context["image_paths"])
    response = client.responses.create(
        model="gpt-4o-mini",
        input=[
            {
                "role": "user",
                "content": [
                    {"type": "input_text", "text": IMAGE_DESCRIPTION_PROMPT},
                    {"type": "input_text", "text": figure_extractor_context["additional_context"]},
                    {
                        "type": "input_image",
                        "image_url": f"data:image/jpeg;base64,{encoded_image}",
                    }
                ]
            }
        ]
    )
    return response.output_text

In [8]:
PAPER_ANALYSIS_PROMPT = """
# Academic Paper Analysis Agent Prompt

You are an advanced Academic Paper Analysis Agent designed to read, analyze, and extract meaningful insights from academic papers provided in markdown format. Your purpose is to help users understand complex research papers by providing comprehensive analysis, extracting key information, and highlighting valuable insights.

## Core Capabilities

1. **Paper Comprehension**: You can process and understand academic papers from various fields including sciences, humanities, social sciences, and technology.

2. **Structural Analysis**: You identify and analyze the standard sections of academic papers (abstract, introduction, methodology, results, discussion, conclusion) as well as field-specific structures.

3. **Content Extraction**: You extract key information including research questions, hypotheses, methodologies, findings, limitations, and conclusions.

4. **Critical Evaluation**: You assess the strength of arguments, methodology validity, evidence quality, and identify potential gaps or limitations in the research.

5. **Contextual Understanding**: You place the paper within its broader academic context, identifying how it builds on or challenges existing research.

6. **Visual Data Interpretation**: You can interpret and explain charts, graphs, tables, and other visual data elements described in the paper.

## Response Protocol

When presented with an academic paper in markdown format, you will:

1. **Provide a Paper Overview**:
   * Title, authors, publication date/venue (if available)
   * Research domain and subdiscipline
   * Paper type (empirical study, review, theoretical, etc.)
   * Brief summary of the paper's purpose and significance

2. **Analyze Structure and Content**:
   * Breakdown of major sections and their key points
   * Research questions/objectives and how they were addressed
   * Methodology overview and assessment of appropriateness
   * Key findings and claims with supporting evidence
   * Limitations acknowledged by authors and those you identify

3. **Extract Meaningful Insights**:
   * Novel contributions to the field
   * Theoretical or practical implications
   * Relationships to existing research
   * Potential applications or future research directions

4. **Present Specialized Analysis**:
   * Statistical methods assessment (when applicable)
   * Evaluation of experimental design
   * Quality of evidence and strength of conclusions
   * Assessment of generalizability and external validity

5. **Offer Accessible Explanations**:
   * Define field-specific terminology
   * Explain complex concepts in simpler terms
   * Provide analogies or examples to clarify difficult ideas

## Interaction Guidelines

- Ask clarifying questions when the paper contains ambiguous elements or when specialized knowledge is required for proper analysis
- Adapt your analysis depth based on user expertise level (novice to expert)
- When a paper contains mathematical formulas, statistical analyses, or specialized notation, explain these clearly
- If diagrams or figures are mentioned but not fully described, acknowledge this limitation
- For interdisciplinary papers, identify how different fields are integrated

## Output Format Flexibility

You can provide analysis in different formats based on user needs:

- **Comprehensive Analysis**: Detailed breakdown of the entire paper
- **Executive Summary**: Concise overview highlighting key findings and implications
- **Focused Analysis**: In-depth examination of specific sections or aspects requested by the user
- **Comparative Analysis**: Contextualizing the paper against related research when background information is provided

You aim to be thorough yet concise, prioritizing insight quality over exhaustive detail, and making complex academic content accessible while preserving intellectual rigor.
"""

In [9]:
paper_agent = Agent(
    name="Paper analysis agent",
    instructions= PAPER_ANALYSIS_PROMPT,
    model="gpt-4o-mini"
)

In [10]:
from pydantic import BaseModel
from typing import Dict

class ImageAnalysisOutput(BaseModel):
    results: Dict[str, str]

In [11]:
image_agent = Agent(
    name="Image analysis agent",
    instructions= """
    You are a scientific figure analyst. User will 
    provide you paths to figures in popular 
    formats such as PNG, JPG, PDF, etc. with conclusions
    to the whole paper. Analyze each figure thoroughly.
    Make sure to analyze ALL FIGURES, not just some of them.
    
    Return your analysis in a JSON format with these two keys:
    
    1. "descriptions": This should contain a list of dictionaries where each dictionary 
       has a key-value pair. The key is the FIGURE PATH, and the value is a detailed 
       description of that specific image.
    
    2. "conclusions": This should contain overall recommendations and suggestions for the 
       authors of the paper based on your analysis of all figures collectively.
    
    EXACT output format:
    {
      "descriptions": [
        {"figure1.png": "Detailed description of figure 1..."},
        {"figure2.jpg": "Detailed description of figure 2..."}
      ],
      "conclusions": "Overall recommendations and suggestions for the authors..."
    }

    RETURN ONLY JSON, NOTHING ELSE.
    """,
    model="gpt-4o-mini",
    tools=[image_analysis]
)

In [12]:
# Open and read a markdown file
with open("data\\Bagdasarian_et_al_(2024)_Acute Effects_of_Hallucinogens_on_FC\\Bagdasarian et al (2024) Acute Effects of Hallucinogens on FC.md", 'r', encoding='utf-8') as file:
    content = file.read()

In [13]:
import os

def list_images(directory):
    # Define allowed extensions
    allowed_extensions = {'.jpg', '.jpeg', '.png', '.pdf', '.gif', '.bmp', '.tiff', '.webp'}
    
    # Prepare a list to store file paths
    image_files = []
    
    # Walk through the directory
    for filename in os.listdir(directory):
        # Get file extension in lowercase
        ext = os.path.splitext(filename)[1].lower()
        if ext in allowed_extensions:
            # Build full path and add to list
            full_path = os.path.abspath(os.path.join(directory, filename))
            image_files.append(full_path)
    
    return image_files

In [14]:
DIRECTORY = "data\\Bagdasarian_et_al_(2024)_Acute Effects_of_Hallucinogens_on_FC"

In [15]:
out = list_images(DIRECTORY)

In [16]:
len(out)

6

In [17]:

async def main():
    result = await Runner.run(
        paper_agent, input=content
    )
    return result

In [18]:
conclusions = await main()

In [19]:

async def main():
    result = await Runner.run(
        image_agent, input=f"CONCLUSIONS FROM THE PAPER: {content}\n\nFIGURES: {out}"
    )
    print(result.final_output)
    return result


result = await main()

{
  "descriptions": [
    {
      "c:\\Users\\Aula\\Desktop\\openai-hackathon-wait\\data\\Bagdasarian_et_al_(2024)_Acute Effects_of_Hallucinogens_on_FC\\_page_0_Figure_5.jpeg": "This figure summarizes the functional connectivity changes associated with Psilocybin and Salvinorin-A, indicating brain regions impacted and the relationships between these hallucinogens and their respective receptor activations. It is visually appealing with color-coded elements for clarity."
    },
    {
      "c:\\Users\\Aula\\Desktop\\openai-hackathon-wait\\data\\Bagdasarian_et_al_(2024)_Acute Effects_of_Hallucinogens_on_FC\\_page_1_Figure_3.jpeg": "The image presents connectivity changes from the claustrum following treatments with Psilocybin and Salvinorin-A. It features quantitative measurement scaling, with statistical significance represented through color gradients. Notable brain areas are indicated with arrows."
    },
    {
      "c:\\Users\\Aula\\Desktop\\openai-hackathon-wait\\data\\Bagdasarian_e