In [1]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("Введите ваш OpenAI API ключ: ")

Введите ваш OpenAI API ключ:  ········


In [67]:
import re

import arxiv
import requests
from langchain.agents import AgentExecutor, Tool, create_react_agent
from langchain.chains import RetrievalQA
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.tools.structured import StructuredTool
from langchain_openai import ChatOpenAI as OpenAI
from pydantic import BaseModel, Field

# Определяем вспомогательные функции для инструментов

In [81]:
class ArxivArticle(BaseModel):
    title: str = Field(description="Заголовок статьи")
    pdf_url: str = Field(description="Ссылка на PDF-файл статьи")


def search_arxiv(query: str) -> ArxivArticle:
    search = arxiv.Search(query=query, max_results=1)
    result = next(search.results())

    # Проверяем, есть ли pdf_url
    pdf_url = getattr(result, "pdf_url", None)
    if not pdf_url:
        # Формируем pdf_url вручную из entry_id, если не найден
        entry_id = result.entry_id  # например https://arxiv.org/abs/2301.12345
        # извлечь id из entry_id
        arxiv_id = entry_id.split("/")[-1]
        pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"

    return ArxivArticle.model_validate(
        {"title": result.title, "summary": result.summary, "pdf_url": pdf_url}
    )


def download_pdf(pdf_url: str) -> str:
    """Скачивает pdf, сохраняет в temp.pdf."""
    r = requests.get(pdf_url)
    with open("temp.pdf", "wb") as f:
        f.write(r.content)
    return "temp.pdf"


def load_pdf(pdf_path: str):
    """Загружает PDF и разбивает на страницы."""
    loader = PyPDFLoader(pdf_path)
    return loader.load_and_split()


class PDFQA:
    """Класс для индексации PDF и ответа на вопросы по ней."""

    def __init__(self):
        self.qa_chain = None

    def index_pdf(self, pages):
        embeddings = OpenAIEmbeddings()
        vectorstore = FAISS.from_documents(pages, embeddings)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        self.qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(), retriever=retriever)

    def ask(self, question: str) -> str:
        if not self.qa_chain:
            return "PDF не загружен и не проиндексирован."
        return self.qa_chain.run(question)


pdf_qa = PDFQA()


def summarize_pdf(pdf_path: str) -> str:
    pages = load_pdf(pdf_path)
    llm = OpenAI(model_name="o3-mini")
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    summary = chain.run(pages)
    return summary

# Определяем инструменты

In [82]:
def tool_search_arxiv(query: str) -> str:
    return search_arxiv(query)


def tool_load_and_index(pdf_url: str) -> str:
    pdf_path = download_pdf(pdf_url)
    pages = load_pdf(pdf_path)
    pdf_qa.index_pdf(pages)
    return f"PDF загружен и проиндексирован: {len(pages)} страниц"


def tool_ask_pdf(question: str) -> str:
    return pdf_qa.ask(question)


def tool_summarize(pdf_url: str) -> str:
    pdf_path = download_pdf(pdf_url)
    return summarize_pdf(pdf_path)


tools = [
    StructuredTool.from_function(
        name="SearchArxiv",
        func=tool_search_arxiv,
        description=(
            "Используется для поиска научных статей на arXiv по ключевым словам или тематике. "
            "На вход принимает текстовый поисковый запрос (например: 'diffusion models in computer vision'). "
            "Возвращает заголовок статьи, краткое описание (summary) и ссылку на PDF."
        ),
    ),
    Tool(
        name="LoadAndIndexPDF",
        func=tool_load_and_index,
        description=(
            "Используется для загрузки и подготовки PDF-документа для дальнейшего анализа. "
            "На вход принимает ссылку на PDF (обычно полученную из SearchArxiv). "
            "После выполнения позволяет задавать вопросы к содержимому PDF через инструмент AskPDF."
        ),
    ),
    Tool(
        name="AskPDF",
        func=tool_ask_pdf,
        description=(
            "Используется для получения ответов на вопросы по ранее загруженному PDF-файлу. "
            "Перед использованием необходимо сначала вызвать LoadAndIndexPDF. "
            "На вход принимает произвольный вопрос, связанный с содержимым PDF."
        ),
    ),
    Tool(
        name="SummarizePDF",
        func=tool_summarize,
        description=(
            "Используется для создания краткого резюме (summary) статьи на основе PDF-документа. "
            "На вход принимает ссылку на PDF-файл (полученную через SearchArxiv). "
            "Позволяет быстро понять суть статьи без чтения всего текста."
        ),
    ),
]

# Сам агент

In [83]:
prompt = hub.pull("hwchase17/react")
print(prompt.template)



Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


In [84]:
llm = OpenAI(model_name="o3-mini")

react_agent = create_react_agent(llm, tools, prompt)

In [85]:
agent_executor = AgentExecutor(
    agent=react_agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=5,
)

# Примеры

In [86]:
query = "diffusion models 2024"
search_result = agent_executor.invoke({"input": query})
print(search_result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: diffusion models 2024
Thought: The query likely refers to recent research and developments in diffusion models around the year 2024. I want to search for relevant arXiv articles or summaries about diffusion models in 2024.
Action: SearchArxiv
Action Input: "diffusion models 2024"[0m

  result = next(search.results())


[36;1m[1;3mtitle="Rebuttal of Morris' criticism of the diffusive compressible Euler model" pdf_url='http://arxiv.org/pdf/2406.18241v1'[0m[32;1m[1;3mQuestion: diffusion models 2024  
Thought: The query “diffusion models 2024” likely refers to recent work or developments related to diffusion models (which can refer to either generative models in machine learning or other diffusion phenomena) in the year 2024. A search on arXiv returned one paper titled “Rebuttal of Morris' criticism of the diffusive compressible Euler model” (pdf link: http://arxiv.org/pdf/2406.18241v1). I will now summarize the content of this paper to see if it relates to diffusion models in the context expected for 2024, and then provide an answer based on that summary.  
Action: SummarizePDF  
Action Input: "http://arxiv.org/pdf/2406.18241v1"  [0m[36;1m[1;3mMagnus Svärd’s note defends the diffusive compressible Euler (dcE) model against Morris’s criticisms. The dcE model modifies the standard Euler equations 

In [89]:
query = "find Rebuttal of Morris' criticism of the diffusive compressible Euler model paper and give its summary"
search_result = agent_executor.invoke({"input": query})
print(search_result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to locate the paper that contains the rebuttal of Morris' criticism of the diffusive compressible Euler model and then provide its summary. I will start by searching for relevant articles on arXiv.

Action: SearchArxiv
Action Input: "Rebuttal of Morris' criticism diffusive compressible Euler model"[0m

  result = next(search.results())


[36;1m[1;3mtitle="Rebuttal of Morris' criticism of the diffusive compressible Euler model" pdf_url='http://arxiv.org/pdf/2406.18241v1'[0m[32;1m[1;3mQuestion: find Rebuttal of Morris' criticism of the diffusive compressible Euler model paper and give its summary
Thought: I found a paper on arXiv titled "Rebuttal of Morris' criticism of the diffusive compressible Euler model" with the PDF link http://arxiv.org/pdf/2406.18241v1. Now I'll retrieve a summary of this paper using the SummarizePDF tool.
Action: SummarizePDF
Action Input: http://arxiv.org/pdf/2406.18241v1[0m[36;1m[1;3mConcise Summary:

Magnus Svärd’s note defends the diffusive compressible Euler (dcE) model against M. Morris’s criticisms regarding its prediction of heat diffusion and sound attenuation. The dcE model modifies the classical Euler equations by adding diffusion terms—including a recently reinstated heat term (κT)—based on constitutive laws that require experimental calibration, rather than fundamental const

  result = next(search.results())


[36;1m[1;3mtitle="Rebuttal of Morris' criticism of the diffusive compressible Euler model" pdf_url='http://arxiv.org/pdf/2406.18241v1'[0m[32;1m[1;3mFinal Answer: The paper “Rebuttal of Morris' criticism of the diffusive compressible Euler model” by Magnus Svärd offers a defense of the diffusive compressible Euler (dcE) model against criticisms raised by M. Morris. The note explains that the dcE model—unlike the classical Euler equations—is modified to include diffusion terms (notably, a recently reinstated heat diffusion term, κT) based on experimentally calibrated constitutive laws rather than on fundamental constants. Svärd argues that Morris’s critique is based on an outdated version of the model (lacking the κT term) and that his interpretation of bulk viscosity’s role is misguided. In the dcE formulation, proper treatment of viscosity-related parameters is essential for adequately capturing non-ideal energy exchanges in polyatomic gases. Moreover, while both the dcE and tradi

In [88]:
query = "What is the research methodology used in the article Rebuttal of Morris’ criticism of the diffusive compressible Euler model"
search_result = agent_executor.invoke({"input": query})
print(search_result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: What is the research methodology used in the article Rebuttal of Morris’ criticism of the diffusive compressible Euler model

Thought: I need to identify and describe the research methodology employed in the specified article. To do so, I will search for the article using the SearchArxiv tool as it is likely available on arXiv, and then extract details – especially parts of the summary or methodology section – that explain the research methods used.

Action: SearchArxiv
Action Input: "Rebuttal of Morris’ criticism of the diffusive compressible Euler model"[0m

  result = next(search.results())


[36;1m[1;3mtitle="Rebuttal of Morris' criticism of the diffusive compressible Euler model" pdf_url='http://arxiv.org/pdf/2406.18241v1'[0m[32;1m[1;3mThought: I have the article link. Now I will load the PDF and ask for a summary of the research methodology described in the article.

Action: LoadAndIndexPDF
Action Input: http://arxiv.org/pdf/2406.18241v1[0m

  embeddings = OpenAIEmbeddings()


[33;1m[1;3mPDF загружен и проиндексирован: 8 страниц[0m[32;1m[1;3mThought: I will now ask a question about the paper's research methodology using AskPDF.
Action: AskPDF
Action Input: "What is the research methodology used in the article 'Rebuttal of Morris’ criticism of the diffusive compressible Euler model'? Please include details on the approach used to rebut Morris’ criticism."[0m[38;5;200m[1;3mThe research methodology used in the article "Rebuttal of Morris’ criticism of the diffusive compressible Euler model" involves addressing the criticism put forward by Morris regarding the diffusive compressible Euler model. The author, Magnus Svärd, uses a theoretical approach to address Morris’ criticisms. Specifically, Svärd examines the points of criticism related to heat diffusion, sound attenuation, and material frame indifference in the diffusive compressible Euler model.

To rebut Morris’ criticisms, Svärd discusses the model itself, provides a detailed explanation of the dif