In [None]:
pip install arxiv

894.08s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Looking in indexes: https://mirrors.aliyun.com/pypi/simple/

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [8]:
pip install openai

Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting openai
  Downloading https://mirrors.aliyun.com/pypi/packages/81/d2/e3992bb7c6641b765c1008e3c96e076e0b50381be2cce344e6ff177bad80/openai-1.79.0-py3-none-any.whl (683 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m683.3/683.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting anyio<5,>=3.5.0 (from openai)
  Downloading https://mirrors.aliyun.com/pypi/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl (100 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading https://mirrors.aliyun.com/pypi/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl (20 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading https://mirrors.aliyun.com/pypi/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl (73 kB)
Collecting jiter<1,>=0.4.0 (

In [7]:
import arxiv
import json
import os
from typing import List

In [9]:
PAPER_DIR = "papers"

In [10]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
  """
    Search for papers on arXiv based on a topic and store their information.

    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)

    Returns:
        List of paper IDs found in the search
  """

  client = arxiv.Client()
  search = arxiv.Search(
      query = topic,
      max_results = max_results,
      sort_by = arxiv.SortCriterion.Relevance
  )
  papers = client.results(search)

  path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
  os.makedirs(path, exist_ok=True)
  file_path = os.path.join(path, "papers_info.json")
  # Try to load existing papers info
  try:
      with open(file_path, "r") as json_file:
          papers_info = json.load(json_file)
  except (FileNotFoundError, json.JSONDecodeError):
      papers_info = {}

  paper_ids = []
  for paper in papers:
    paper_ids.append(paper.get_short_id())
    paper_info = {
        'title': paper.title,
        'authors': [author.name for author in paper.authors],
        'summary': paper.summary,
        'pdf_url': paper.pdf_url,
        'published': str(paper.published.date())
    }
    papers_info[paper.get_short_id()] = paper_info

  with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)

  print(f"Results are saved in: {file_path}")

  return paper_ids

In [11]:
search_papers("computers")

Results are saved in: papers/computers/papers_info.json


['1310.7911v2',
 'math/9711204v1',
 '2208.00733v1',
 '2504.07020v1',
 '2403.03925v1']

In [12]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.

    Args:
        paper_id: The ID of the paper to look for

    Returns:
        JSON string with paper information if found, error message if not found
    """

    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue

    return f"There's no saved information related to paper {paper_id}."

In [13]:
extract_info('1310.7911v2')

'{\n  "title": "Compact manifolds with computable boundaries",\n  "authors": [\n    "Zvonko Iljazovic"\n  ],\n  "summary": "We investigate conditions under which a co-computably enumerable closed set\\nin a computable metric space is computable and prove that in each locally\\ncomputable computable metric space each co-computably enumerable compact\\nmanifold with computable boundary is computable. In fact, we examine the notion\\nof a semi-computable compact set and we prove a more general result: in any\\ncomputable metric space each semi-computable compact manifold with computable\\nboundary is computable. In particular, each semi-computable compact\\n(boundaryless) manifold is computable.",\n  "pdf_url": "http://arxiv.org/pdf/1310.7911v2",\n  "published": "2013-10-29"\n}'

In [7]:
tools = [
    {   
        "type": "function",
        "function": {
            "name": "search_papers",
            "description": "Search for papers on arXiv based on a topic and store their information.",
            "parameters": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for"
                },
                "max_results": {
                    "type": "integer"
                }
            }
        }
    },
    {   
        "type": "function",
        "function": {
            "name": "extract_info",
            "description": "Search for information about a specific paper across all topic directories.",
            "parameters": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for"
                }
            }
        }
    }
]

In [15]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}


def execute_tool(tool_name, tool_args):
  result = mapping_tool_function[tool_name](**tool_args)

  if result in None:
    result = "The operation completed but didn't return any results."
  elif isinstance(result, list):
    result = ", ".join(result)
  elif isinstance(result, dict):
    result = json.dumps(result, indent = 2)
  else:
    result = str(result)

  return result


In [2]:
from openai import OpenAI
client = OpenAI(api_key="sk-lnvdfaibxybnznhrmbmugwvlnrjmvquaketukuymhzvdyvzo", base_url="https://api.siliconflow.cn/v1")

In [3]:
response = client.chat.completions.create(  
    model="Qwen/Qwen2.5-Coder-32B-Instruct",  
    messages=[{  
        "role": "user",  
        "content": "编写Python异步爬虫教程，包含代码示例和注意事项"  
    }],  
    temperature=0.7,  
    max_tokens=4096  
)  
response

ChatCompletion(id='0196e2a4a570bc6a91645a5a3ad562b0', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=' 编写一个Python异步爬虫可以大大提高爬取数据的效率，尤其是在需要处理大量请求的情况下。Python的`asyncio`库和`aiohttp`库是实现异步爬虫的关键工具。下面是一个详细的教程，包括代码示例和注意事项。\n\n### 环境准备\n\n首先，确保你已经安装了`asyncio`和`aiohttp`库。你可以使用`pip`来安装：\n\n```bash\npip install aiohttp\n```\n\n### 基本概念\n\n- **异步编程**：异步编程允许程序在等待某些操作（如网络请求）完成时执行其他任务，从而提高效率。\n- **事件循环**：`asyncio`库的核心是事件循环，它负责调度和执行异步任务。\n- **协程**：协程是一种使用`async def`定义的函数，可以暂停和恢复执行。\n\n### 代码示例\n\n下面是一个简单的异步爬虫示例，它从多个URL获取网页内容并打印出来。\n\n```python\nimport aiohttp\nimport asyncio\n\nasync def fetch(session, url):\n    async with session.get(url) as response:\n        return await response.text()\n\nasync def main(urls):\n    async with aiohttp.ClientSession() as session:\n        tasks = []\n        for url in urls:\n            task = asyncio.create_task(fetch(session, url))\n            tasks.append(task)\n        results = await asyncio.gather(*tasks)\n 

In [10]:
def process_query(query):
    
    messages = [{'role': 'user', 'content': query}]
    
    response = client.chat.completions.create(max_tokens = 2024,
                                  model = 'deepseek-ai/DeepSeek-V2.5', 
                                  tools = tools,
                                  messages = messages)
    print(response.content)
    process_query = True
    while process_query:
        assistant_content = []

        for content in response.choices:
            if content.type == 'text':
                
                print(content.text)
                assistant_content.append(content)
                
                if len(response.content) == 1:
                    process_query = False
            
            elif content.type == 'tool_use':
                
                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})
                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args {tool_args}")
                
                result = execute_tool(tool_name, tool_args)
                messages.append({"role": "user", 
                                  "content": [
                                      {
                                          "type": "tool_result",
                                          "tool_use_id": tool_id,
                                          "content": result
                                      }
                                  ]
                                })
                response = client.messages.create(max_tokens = 2024,
                                  model = 'claude-3-7-sonnet-20250219', 
                                  tools = tools,
                                  messages = messages) 
                
                if len(response.content) == 1 and response.content[0].type == "text":
                    print(response.content[0].text)
                    process_query = False

In [18]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
    
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

In [11]:
process_query("LLM interpretability")

ChatCompletion(id='0196e2a65b60fb09b201808beb7fd0b8', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content='', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='0196e2a66103579041d77958c8296162', function=Function(arguments='{"topic":"LLM interpretability","max_results":5}', name='search_papers'), type='function')]))], created=1747559275, model='deepseek-ai/DeepSeek-V2.5', object='chat.completion', service_tier=None, system_fingerprint='', usage=CompletionUsage(completion_tokens=29, prompt_tokens=161, total_tokens=190, completion_tokens_details=None, prompt_tokens_details=None))


AttributeError: 'ChatCompletion' object has no attribute 'content'