# ü§ñ Automated Research Paper Analysis System
**Multi-Agent AI System for Academic Research Analysis**

---

**Author**: Mohamed Outahajala  
 
**Framework**: Google Agent Development Kit (ADK)  
---

## Quick Start

1. Upload your PDF research paper as `document.pdf`
2. Set your API keys in `.env`:

## Problem Statement
Analyzing research papers manually is time-consuming and requires:
- Reading lengthy PDFs
- Summarizing key findings
- Researching latest trends
- Synthesizing information from multiple sources

This process can take 2-3 hours per paper.

## Solution
An automated Multi-Agent Research System that:
- Extracts PDF content automatically
- Generates comprehensive summaries
- Performs real-time market research
- Produces structured reports in under 5 minutes

**Value**: Reduces research time by 95%, from 3 hours to 5 minutes per paper.

<!-- if numpy or pandas .. not installed -->
!pip install numpy
!pip install pandas
!pip install pypdf

In [1]:
# Load environment variables from a .env file
from dotenv import load_dotenv
load_dotenv()
import os
import requests
import json

# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Retrieve API keys from environment variables
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME", "gemini-2.5-flash")  # Default to "gemini-2.5-flash" if not set   


In [27]:

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [28]:
print("Google API Key:", GOOGLE_API_KEY)
print("Google Search API Key:", GOOGLE_SEARCH_API_KEY)
print("Model Name:", MODEL_NAME)

Google API Key: AIzaSyC8Fgp6KRj9VpFwJvxQBXvlBXpJmX-1ROs
Google Search API Key: AIzaSyArz9Ulr4GNaJOWyTCHs7aaDW7xWMj20a0
Model Name: gemini-2.5-flash


!pip install google-adk

In [29]:
from google.adk.agents import Agent, SequentialAgent, ParallelAgent, LoopAgent
from google.adk.models.google_llm import Gemini
from google.adk.runners import InMemoryRunner
from google.adk.tools import AgentTool, FunctionTool, google_search
from google.genai import types

print("‚úÖ ADK components imported successfully.")

‚úÖ ADK components imported successfully.


In [30]:
retry_config=types.HttpRetryOptions(
    attempts=5,  # Maximum retry attempts
    exp_base=7,  # Delay multiplier
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504], # Retry on these HTTP errors
)

In [31]:
# Diagnostic: Check if document.pdf is accessible
import os

print("Current working directory:", os.getcwd())
print("Files in current directory:")
for item in os.listdir('.'):
    if item.endswith('.pdf'):
        print(f"  üìÑ {item}")

# Check if document.pdf exists
pdf_path = "document.pdf"
if os.path.exists(pdf_path):
    print(f"‚úÖ '{pdf_path}' found and accessible")
    # Quick test read
    from pypdf import PdfReader
    reader = PdfReader(pdf_path)
    print(f"   Pages: {len(reader.pages)}")
    print(f"   First 100 chars: {reader.pages[0].extract_text()[:100]}...")
else:
    print(f"‚ùå '{pdf_path}' NOT found in current directory")
    print(f"   You may need to specify the full path or move the file")

Current working directory: /Users/admin/HF/Agents5D
Files in current directory:
  üìÑ document.pdf
‚úÖ 'document.pdf' found and accessible
   Pages: 12
   First 100 chars: TransVar ‚Äì the Corpus for Variation and Change Study of the
Historical Transcarpathian lects
Anonymo...


In [32]:
# PDF Search Tool
from pypdf import PdfReader

def search_pdf_tool(file_path: str, query: str) -> str:
    """
    Searches for keywords within a PDF file and returns relevant text snippets.
    If the file is not found, returns mock data for demonstration.
    """
    print(f"    üîé [Tool] Searching PDF '{file_path}' for: '{query}'")
    
    # 1. Try to read the actual file
    if os.path.exists(file_path):
        try:
            reader = PdfReader(file_path)
            text = ""
            for page in reader.pages:
                text += page.extract_text() + "\n"
            
            # Simple keyword search (in a real app, use vector search/RAG)
            paragraphs = text.split('\n\n')
            results = [p for p in paragraphs if query.lower() in p.lower()]
            
            if results:
                return "\n---\n".join(results[:3]) # Return top 3 matches
            return "No specific matches found in the document."
        except Exception as e:
            return f"Error reading PDF: {e}"

    # 2. Fallback Mock Data (for testing without a file)
    else:
        print(f"    ‚ö†Ô∏è [Tool] File not found. Using MOCK data for demonstration.")
        mock_content = {
            "quantum": "Quantum computing uses qubits to perform calculations exponentially faster than classical bits.",
            "ai": "Artificial Intelligence agents can perceive their environment and take actions to achieve goals.",
            "climate": "Climate change mitigation requires a transition to renewable energy sources."
        }
        for key, value in mock_content.items():
            if key in query.lower():
                return f"Found in mock PDF: {value}"
        return "No information found in the mock document."

print("‚úÖ PDF Search Tool initialized.")

‚úÖ PDF Search Tool initialized.


In [33]:
# 1. PDF Reader Agent 
pdf_reader_agent = Agent(
    name="PDFReader",
    model=Gemini(model=MODEL_NAME, retry_options=retry_config),
    instruction="""You are an expert document researcher. 
    Your job is to use the `search_pdf_tool` to find specific information in a document based on the user's request.
    Always cite the specific text segments you found.""",
    tools=[FunctionTool(search_pdf_tool)],
    output_key="pdf_findings"
)



In [34]:
# 2. Summarizer Agent
summarizer_agent = Agent(
    name="Summarizer",
    model=Gemini(model=MODEL_NAME, retry_options=retry_config),
    instruction="""You are an expert scientific paper analyst. 
    Read the research paper content provided: {pdf_findings}
    
    Create a comprehensive summary that includes:
    1. **Main Topic**: What is the paper about?
    2. **Key Contributions**: What are the novel contributions and innovations?
    3. **Methodology**: What approaches or methods were used?
    4. **Results/Findings**: What were the main outcomes?
    
    Keep the summary clear, structured, and under 200 words.
    If the findings are empty, state that no information was found.""",
    output_key="final_summary"
)

In [35]:
# 3. Tech Researcher
tech_researcher = Agent(
    name="Tech_Researcher",
    model=Gemini(model=MODEL_NAME, retry_options=retry_config),

    instruction="""
You are a senior research analyst.

Input: {pdf_findings}

1. Extract the paper‚Äôs **main technical focus**, research problem, and method.
2. Evaluate the paper technically:
   - What is innovative?
   - What is weak or missing?
   - What assumptions does it make?
   - Possible real-world applications?
3. Perform a web search using the search tool:
   - Find the latest (2024‚Äì2025) work, breakthroughs, or criticisms related to the same topic.
   - Prefer scholarly or technical sources.
4. Produce a concise synthesis (max 100 words):
   - Technical evaluation of the paper
   - How the latest research trends compare or validate/challenge it
   - Missing gaps or future directions

Your output must be factual, technical, and short.
""",

    tools=[google_search],
    output_key="tech_research"
)


In [36]:
# The ParallelAgent runs all its sub-agents simultaneously
# other agents can be added here later
parallel_research_team = ParallelAgent(
    name="ParallelResearchTeam",
    sub_agents=[summarizer_agent, tech_researcher],
)

In [37]:
# Agregate results from parallel agents
research_aggregator = Agent(
    name="ResearchAggregator",
    model=Gemini(model=MODEL_NAME, retry_options=retry_config),
    instruction="""
You are a research synthesis expert.
Input:
1. Summary from Summarizer Agent: {final_summary}
2. Technical research from Tech Researcher Agent: {tech_research}
Your task is to combine these inputs into a single, coherent research report that addresses the user's original question. Ensure the report is clear, concise, and well-structured.
""",
    output_key="research_report"
)

In [38]:
# Create the Sequential Agent to agregate read PDF, then run workflow
Research_workflow_Agent = SequentialAgent(
    name="ResearchWorkflowAgent",
    sub_agents=[pdf_reader_agent, parallel_research_team, research_aggregator],
)

In [39]:
# show the workflow structure
print("üöÄ Research Workflow Agent Structure:")
print("="*80)
print("1. PDF Content Extraction")
print("2. Paper Summary")
print("3. Technical Evaluation & Trends")
print("4. Comprehensive Research Report")
print("="*80)


üöÄ Research Workflow Agent Structure:
1. PDF Content Extraction
2. Paper Summary
3. Technical Evaluation & Trends
4. Comprehensive Research Report


In [46]:
# ========================================
# üöÄ EXECUTE THE WORKFLOW - MAIN EXECUTION
# ========================================

import uuid
from datetime import datetime
from google.genai import types

# Generate unique IDs
user_id = f"user_{uuid.uuid4().hex[:8]}"
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

runner = InMemoryRunner(
    agent=Research_workflow_Agent,
    app_name="agents"
)

print(f"üöÄ Starting AI Research Paper Analysis...")
print(f"User ID: {user_id}")
print(f"Session ID: {session_id}")
print("="*80 + "\n")

# Consume the generator properly
final_result = None
step_count = 0

# ‚úÖ MUST use types.Content object, NOT a string
message = types.Content(
    role="user",
    parts=[types.Part(text="Analyze the content of document.pdf and provide a comprehensive research report with technical evaluation and latest trends.")]
)

try:
    for step in runner.run(
        user_id=user_id,
        session_id=session_id,
        new_message=message  # ‚úÖ Message object (not string)
    ):
        step_count += 1
        print(f"‚úì Step {step_count}: Processing...")
        final_result = step
except Exception as e:
    print(f"‚ùå Error during execution: {e}")
    import traceback
    traceback.print_exc()

# ========================================
# üìä DISPLAY RESULTS  
# ========================================

print("\n" + "="*80)
print("üìä FINAL RESEARCH REPORT")
print("="*80 + "\n")

if final_result and hasattr(final_result, 'data'):
    data = final_result.data
    
    for key, value in data.items():
        print(f"\n{'='*80}")
        print(f"üìÑ {key.upper()}")
        print(f"{'='*80}")
        print(value)
else:
    print("‚ùå No result available")
    print(f"Steps processed: {step_count}")

print("\n‚úÖ Analysis Complete!")

Exception in thread Thread-16 (_asyncio_thread_main):
Traceback (most recent call last):
  File "/usr/local/Cellar/python@3.10/3.10.14_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/local/Cellar/python@3.10/3.10.14_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/admin/HF/Agents5D/.venv/lib/python3.10/site-packages/google/adk/runners.py", line 329, in _asyncio_thread_main
    asyncio.run(_invoke_run_async())
  File "/usr/local/Cellar/python@3.10/3.10.14_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/asyncio/runners.py", line 44, in run
    return loop.run_until_complete(main)
  File "/usr/local/Cellar/python@3.10/3.10.14_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/Users/admin/HF/Age

üöÄ Starting AI Research Paper Analysis...
User ID: user_b40addaf
Session ID: session_20251128_235739


üìä FINAL RESEARCH REPORT

‚ùå No result available
Steps processed: 0

‚úÖ Analysis Complete!
