In [1]:
import arxiv
import pandas as pd

search = arxiv.Search(
    # filter by containing "acl" in comment
    # cannot filter by "co:acl+2023"
    query = "co:acl",
    max_results = 1000,
    sort_by = arxiv.SortCriterion.SubmittedDate,
)

papers = []
for result in search.results():
    paper = [
        result.entry_id,
        result.updated,
        result.published,
        result.title,
        result.summary,
        result.comment,
        result.primary_category,
    ]
    papers.append(paper)

In [2]:
cols = ["entry_id", "updated", "published", "title", "summary", "comment", "primary_category"]

df = pd.DataFrame(papers, columns=cols)

In [3]:
df[df["comment"].str.contains("23")]

Unnamed: 0,entry_id,updated,published,title,summary,comment,primary_category
0,http://arxiv.org/abs/2305.15403v1,2023-05-24 17:59:03+00:00,2023-05-24 17:59:03+00:00,AV-TranSpeech: Audio-Visual Robust Speech-to-S...,Direct speech-to-speech translation (S2ST) aim...,Accepted to ACL 2023,cs.CL
1,http://arxiv.org/abs/2305.15387v1,2023-05-24 17:48:40+00:00,2023-05-24 17:48:40+00:00,Peek Across: Improving Multi-Document Modeling...,The integration of multi-document pre-training...,Accepted at ACL 2023; camera-ready version,cs.CL
2,http://arxiv.org/abs/2305.15358v1,2023-05-24 17:10:45+00:00,2023-05-24 17:10:45+00:00,Context-Aware Transformer Pre-Training for Ans...,Answer Sentence Selection (AS2) is a core comp...,Accepted at ACL 2023,cs.CL
3,http://arxiv.org/abs/2305.15344v1,2023-05-24 16:57:04+00:00,2023-05-24 16:57:04+00:00,Learning Answer Generation using Supervision f...,Recent studies show that sentence-level extrac...,Accepted at ACL 2023,cs.CL
4,http://arxiv.org/abs/2305.15212v1,2023-05-24 14:51:01+00:00,2023-05-24 14:51:01+00:00,Towards Adaptive Prefix Tuning for Parameter-E...,Fine-tuning large pre-trained language models ...,Accepted to ACL 2023 (Main conference),cs.CL
...,...,...,...,...,...,...,...
728,http://arxiv.org/abs/2203.09590v5,2023-05-04 15:49:54+00:00,2022-03-17 20:08:25+00:00,ECOLA: Enhanced Temporal Knowledge Embeddings ...,Since conventional knowledge embedding models ...,accepted to Findings of the ACL 2023,cs.CL
834,http://arxiv.org/abs/2203.05386v2,2023-05-15 18:09:50+00:00,2022-03-10 14:24:19+00:00,Faking Fake News for Real Fake News Detection:...,Despite recent advances in detecting fake news...,Accepted by ACL 2023,cs.CL
899,http://arxiv.org/abs/2202.13047v3,2023-05-18 05:25:49+00:00,2022-02-26 03:17:08+00:00,AugESC: Dialogue Augmentation with Large Langu...,Crowdsourced dialogue corpora are usually limi...,Findings of ACL 2023,cs.CL
914,http://arxiv.org/abs/2202.07255v2,2023-05-24 16:03:28+00:00,2022-02-15 09:04:14+00:00,Enhancing Cross-lingual Prompting with Dual Pr...,Prompting shows promising results in few-shot ...,ACL 2023 Findings,cs.CL


In [4]:
# comment contains 23 but not 2023
df[df["comment"].str.contains("23") & ~df["comment"].str.contains("2023") ]

Unnamed: 0,entry_id,updated,published,title,summary,comment,primary_category
7,http://arxiv.org/abs/2305.15182v1,2023-05-24 14:14:08+00:00,2023-05-24 14:14:08+00:00,HiTIN: Hierarchy-aware Tree Isomorphism Networ...,Hierarchical text classification (HTC) is a ch...,Accepted by ACL'23,cs.CL
26,http://arxiv.org/abs/2305.14618v1,2023-05-24 01:35:10+00:00,2023-05-24 01:35:10+00:00,Abductive Commonsense Reasoning Exploiting Mut...,Abductive reasoning aims to find plausible exp...,accepted at ACL'23,cs.CL
37,http://arxiv.org/abs/2305.13833v1,2023-05-23 08:53:33+00:00,2023-05-23 08:53:33+00:00,Reducing Sensitivity on Speaker Names for Text...,Changing speaker names consistently throughout...,findings of ACL'23,cs.CL
215,http://arxiv.org/abs/2305.06407v1,2023-05-10 18:32:32+00:00,2023-05-10 18:32:32+00:00,Combo of Thinking and Observing for Outside-Kn...,Outside-knowledge visual question answering is...,"ACL-23, Main Conference",cs.CV
222,http://arxiv.org/abs/2305.05964v1,2023-05-10 08:16:36+00:00,2023-05-10 08:16:36+00:00,Interpretable Multimodal Misinformation Detect...,Multimodal misinformation on online social pla...,"Accepted by Findings of ACL 23. 9 pages, 6 fig...",cs.MM
266,http://arxiv.org/abs/2305.03336v1,2023-05-05 07:40:41+00:00,2023-05-05 07:40:41+00:00,"QCRI at SemEval-2023 Task 3: News Genre, Frami...",Misinformation spreading in mainstream and soc...,"Accepted at SemEval-23 (ACL-23, propaganda, di...",cs.CL
367,http://arxiv.org/abs/2212.03813v4,2023-05-23 13:39:39+00:00,2022-12-07 17:54:59+00:00,Robustness of Learning from Task Instructions,Traditional supervised learning mostly works o...,ACL'23 Finding Accepted,cs.CL
376,http://arxiv.org/abs/2211.11297v2,2023-05-23 08:40:14+00:00,2022-11-21 09:38:59+00:00,In-sample Curriculum Learning by Sequence Comp...,Curriculum learning has shown promising improv...,ACL'23,cs.CL
380,http://arxiv.org/abs/2211.08073v4,2023-05-22 11:55:52+00:00,2022-11-15 11:53:55+00:00,GLUE-X: Evaluating Natural Language Understand...,Pre-trained language models (PLMs) are known t...,Accepted to ACL-23 Findings,cs.CL
705,http://arxiv.org/abs/2203.10316v4,2022-09-15 13:43:33+00:00,2022-03-19 12:37:16+00:00,Learning to Reason Deductively: Math Word Prob...,Solving math word problems requires deductive ...,"12 pages, 7 figures, ACL-2022, additional expe...",cs.CL


In [5]:
df_acl_23 = df[df["comment"].str.contains("23")].reset_index(drop=True)

In [6]:
df_acl_23.to_csv("arxiv_acl_2023.csv")