In [1]:
import arxiv
import datetime
import openai
import logging
from typing import List, Dict

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# OpenAI API key (replace with your actual key)
openai.api_key = "your-openai-api-key"

def fetch_papers() -> List[arxiv.Result]:
    """Fetch papers from arXiv for the cs.CL category published today."""
    today = datetime.date.today()
    query = f"cat:cs.CL AND submittedDate:[{today}T00:00:00Z TO {today}T23:59:59Z]"
    
    client = arxiv.Client(
        page_size=100,
        delay_seconds=3.0,
        num_retries=3
    )
    client.results(search=arxiv.Search(query=query, max_results=100))

    search = arxiv.Search(
        query=query,
        max_results=100,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )
    
    papers = list(c.results())
    logging.info(f"Fetched {len(papers)} papers from arXiv")
    return papers

In [2]:
a = fetch_papers()

  papers = list(search.results())
2024-07-01 14:52:35,241 - INFO - Requesting page (first: True, try: 0): https://export.arxiv.org/api/query?search_query=cat%3Acs.CL+AND+submittedDate%3A%5B2024-07-01T00%3A00%3A00Z+TO+2024-07-01T23%3A59%3A59Z%5D&id_list=&sortBy=submittedDate&sortOrder=descending&start=0&max_results=100
2024-07-01 14:52:36,465 - INFO - Got empty first page; stopping generation
2024-07-01 14:52:36,466 - INFO - Fetched 0 papers from arXiv


In [2]:
client = arxiv.Client(
    page_size=10
)

In [9]:
aa = client.results(
    search=arxiv.Search(
        query="cat:cs.CL",
        max_results=5,
        sort_by=arxiv.SortCriterion.SubmittedDate,
        sort_order=arxiv.SortOrder.Descending
    )
    
)

In [10]:
for result in aa:
    print(f"Title: {result.title}")
    print(f"Authors: {', '.join(author.name for author in result.authors)}")
    print(f"Published: {result.published}")
    print(f"DOI URL: {result.pdf_url}")
    # print(f"Summary: {result.summary}\n")


2024-07-09 00:49:25,053 - INFO - Requesting page (first: True, try: 0): https://export.arxiv.org/api/query?search_query=cat%3Acs.CL&id_list=&sortBy=submittedDate&sortOrder=descending&start=0&max_results=10
2024-07-09 00:49:26,384 - INFO - Got first page: 10 of 65762 total results


Title: Me, Myself, and AI: The Situational Awareness Dataset (SAD) for LLMs
Authors: Rudolf Laine, Bilal Chughtai, Jan Betley, Kaivalya Hariharan, Jeremy Scheurer, Mikita Balesni, Marius Hobbhahn, Alexander Meinke, Owain Evans
Published: 2024-07-05 17:57:02+00:00
DOI URL: http://arxiv.org/pdf/2407.04694v1
Title: ANAH-v2: Scaling Analytical Hallucination Annotation of Large Language Models
Authors: Yuzhe Gu, Ziwei Ji, Wenwei Zhang, Chengqi Lyu, Dahua Lin, Kai Chen
Published: 2024-07-05 17:56:38+00:00
DOI URL: http://arxiv.org/pdf/2407.04693v1
Title: Missed Causes and Ambiguous Effects: Counterfactuals Pose Challenges for Interpreting Neural Networks
Authors: Aaron Mueller
Published: 2024-07-05 17:53:03+00:00
DOI URL: http://arxiv.org/pdf/2407.04690v1
Title: Rethinking Visual Prompting for Multimodal Large Language Models with External Knowledge
Authors: Yuanze Lin, Yunsheng Li, Dongdong Chen, Weijian Xu, Ronald Clark, Philip Torr, Lu Yuan
Published: 2024-07-05 17:43:30+00:00
DOI URL: ht

In [1]:
import arxiv
from datetime import datetime, timedelta

# Define search parameters
search_query = "cat:cs.CL"  # Computation and Language category
today = datetime.now()
start_date = today.strftime("%Y%m%d%H%M%S")
end_date = (today + timedelta(days=1)).strftime("%Y%m%d%H%M%S")

# Search for papers
search = arxiv.Search(
    query=search_query,
    max_results=10,
    sort_by=arxiv.SortCriterion.SubmittedDate,
)

# Fetch and display results
for result in search.results():
    print(f"Title: {result.title}")
    print(f"Authors: {', '.join(author.name for author in result.authors)}")
    print(f"Published: {result.published}")
    print(f"Summary: {result.summary}\n")


  for result in search.results():


Title: Planetarium: A Rigorous Benchmark for Translating Text to Structured Planning Languages
Authors: Max Zuo, Francisco Piedrahita Velez, Xiaochen Li, Michael L. Littman, Stephen H. Bach
Published: 2024-07-03 17:59:53+00:00
Summary: Many recent works have explored using language models for planning problems.
One line of research focuses on translating natural language descriptions of
planning tasks into structured planning languages, such as the planning domain
definition language (PDDL). While this approach is promising, accurately
measuring the quality of generated PDDL code continues to pose significant
challenges. First, generated PDDL code is typically evaluated using planning
validators that check whether the problem can be solved with a planner. This
method is insufficient because a language model might generate valid PDDL code
that does not align with the natural language description of the task. Second,
existing evaluation sets often have natural language descriptions of th

In [3]:
list(search.results())

  list(search.results())


[arxiv.Result(entry_id='http://arxiv.org/abs/2407.03321v1', updated=datetime.datetime(2024, 7, 3, 17, 59, 53, tzinfo=datetime.timezone.utc), published=datetime.datetime(2024, 7, 3, 17, 59, 53, tzinfo=datetime.timezone.utc), title='Planetarium: A Rigorous Benchmark for Translating Text to Structured Planning Languages', authors=[arxiv.Result.Author('Max Zuo'), arxiv.Result.Author('Francisco Piedrahita Velez'), arxiv.Result.Author('Xiaochen Li'), arxiv.Result.Author('Michael L. Littman'), arxiv.Result.Author('Stephen H. Bach')], summary="Many recent works have explored using language models for planning problems.\nOne line of research focuses on translating natural language descriptions of\nplanning tasks into structured planning languages, such as the planning domain\ndefinition language (PDDL). While this approach is promising, accurately\nmeasuring the quality of generated PDDL code continues to pose significant\nchallenges. First, generated PDDL code is typically evaluated using plan