In [1]:
import arxiv
from datetime import datetime, timedelta, timezone, time, date
from zoneinfo import ZoneInfo
from google import genai
import os

In [None]:
def last_arxiv_new_window(now_et=None):
    ET = ZoneInfo("America/New_York")
    if now_et is None:
        now_et = datetime.now(ET)
    # announcement candidate: today if after 20:00 ET, else yesterday
    candidate = now_et.date() if now_et.time() >= time(20, 0) else (now_et.date() - timedelta(days=1))
    # if candidate falls on Fri/Sat, back up to the last announcement day
    while candidate.weekday() in (4, 5):  # Fri=4, Sat=5
        candidate -= timedelta(days=1)

    wd = candidate.weekday()
    if wd == 0:  # Monday announcement -> window: Fri 14:00 -> Mon 14:00 (3 days)
        start_et = datetime.combine(candidate - timedelta(days=3), time(14, 0), tzinfo=ET)
        end_et   = datetime.combine(candidate, time(14, 0), tzinfo=ET)
    elif wd == 6: # Sunday announcement -> window: Thu 14:00 -> Fri 14:00 (see schedule)
        start_et = datetime.combine(candidate - timedelta(days=3), time(14, 0), tzinfo=ET)  # Thu 14:00
        end_et   = datetime.combine(candidate - timedelta(days=2), time(14, 0), tzinfo=ET)  # Fri 14:00
    else:
        # Tue/Wed/Thu announcements -> window is (candidate-1 day 14:00) -> candidate 14:00
        start_et = datetime.combine(candidate - timedelta(days=1), time(14, 0), tzinfo=ET)
        end_et   = datetime.combine(candidate, time(14, 0), tzinfo=ET)

    # convert to UTC for comparing against API datetimes
    start_utc = start_et.astimezone(ZoneInfo("UTC"))
    end_utc   = end_et.astimezone(ZoneInfo("UTC"))
    return start_utc, end_utc, candidate

start, end, announcement_date = last_arxiv_new_window()
print("mailing window (UTC):", start, "→", end, " (announcement date:", announcement_date, ")")

# Query arXiv API (may still lag; increase max_results)
search = arxiv.Search(
    query="cat:quant-ph",
    max_results=2000,
    sort_by=arxiv.SortCriterion.SubmittedDate,
    sort_order=arxiv.SortOrder.Descending,
)
client = arxiv.Client()
new = []
for r in client.results(search):
    if start <= r.published < end:
        new.append(r)

print("Found", len(new), "entries via API for that window (expected: match site/RSS).")
for r in new:
    print(r.entry_id, r.published, r.title)

In [None]:
# Arxiv's daily cutoff is 00:00 UTC
now = datetime.now(timezone.utc)
today_utc = datetime(now.year, now.month, now.day, tzinfo=timezone.utc)

# Yesterday's UTC window
start = today_utc - timedelta(days=2)
end = today_utc - timedelta(days=1)

print("Collecting new submissions from", start, "to", end)

search = arxiv.Search(
    query="cat:quant-ph",
    max_results=200,
    sort_by=arxiv.SortCriterion.SubmittedDate,
    sort_order=arxiv.SortOrder.Descending,
)

client = arxiv.Client()

new_submissions = []
for result in client.results(search):
    # Important: check the first submission time (published)
    if start <= result.published < end:
        new_submissions.append(result)

# Print them out
for res in new_submissions:
    print(res.entry_id, res.published, res.title)

In [None]:
# pip install feedparser
import feedparser
from datetime import datetime, timezone

feed = feedparser.parse("https://rss.arxiv.org/rss/quant-ph")
for e in feed.entries:
    # e.link looks like "https://arxiv.org/abs/2510.00056"
    arxiv_id = e.link.rsplit("/", 1)[-1]
    # published_parsed -> time.struct_time (UTC)
    if hasattr(e, "published_parsed"):
        pub = datetime(*e.published_parsed[:6], tzinfo=timezone.utc)
    else:
        pub = None
    print(arxiv_id, pub, e.title)


## Test Gemini

In [14]:
import feedparser
feed = feedparser.parse("https://rss.arxiv.org/rss/quant-ph")

feed.entries[-1]

{'title': 'Probing the Critical Point (CritPt) of AI Reasoning: a Frontier Physics Research Benchmark',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://rss.arxiv.org/rss/quant-ph',
  'value': 'Probing the Critical Point (CritPt) of AI Reasoning: a Frontier Physics Research Benchmark'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://arxiv.org/abs/2509.26574'}],
 'link': 'https://arxiv.org/abs/2509.26574',
 'summary': 'arXiv:2509.26574v2 Announce Type: replace-cross \nAbstract: While large language models (LLMs) with reasoning capabilities are progressing rapidly on high-school math competitions and coding, can they reason effectively through complex, open-ended challenges found in frontier physics research? And crucially, what kinds of reasoning tasks do physicists want LLMs to assist with? To address these questions, we present the CritPt (Complex Research using Integrated Thinking - Physics Test, pronounced "critical point"),

In [2]:
def fetch_current_arxiv_postings_rss(category: str) -> list:
    """
    Returns all of today's brand-new arXiv submissions in a category via RSS.
    """
    import feedparser

    url = f"https://rss.arxiv.org/rss/{category}"
    feed = feedparser.parse(url)

    results = []
    for e in feed.entries:
        announce_type = getattr(e, "arxiv_announce_type", None)
        if "replace" in announce_type.lower():
            continue  # skip replaced articles but keeps new cross-lists

        results.append({
            "title": e.title,
            "authors": e.get("authors", []),
            "summary": e.summary,
            "url": e.link,
            "published": str(e.published) if hasattr(e, "published") else None,
        })

    return results

In [5]:
papers_today = fetch_current_arxiv_postings_rss("quant-ph")

In [None]:
import os, json, re, html
from google import genai
from google.genai import types

def clean(text: str) -> str:
    return re.sub(r"<[^>]+>", "", html.unescape(text or "")).strip()

def filter_papers_with_gemini(papers, research_focus: str):
    """
    Takes output of fetch_current_arxiv_postings_rss and returns
    a list of dicts: [{title, url, reason}]
    """
    blocks = []
    for p in papers:
        # Normalize authors (feedparser often gives dicts with 'name')
        authors = []
        for a in p.get("authors", []):
            if isinstance(a, dict) and "name" in a:
                authors.append(a["name"])
            elif hasattr(a, "name"):  # feedparser Author object
                authors.append(a.name)
            else:
                authors.append(str(a))

        title = p.get("title", "").strip()
        abstract = clean(p.get("summary", ""))
        url = p.get("url", "")

        blocks.append(
            f"Title: {title}\n"
            f"Authors: {', '.join(authors)}\n"
            f"Abstract: {abstract}\n"
            f"URL: {url}\n"
            "----"
        )
    bundle = "\n".join(blocks)

    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

    user_prompt = f"""
        Research areas to look for:
        {research_focus}

        Here are today's arXiv papers (each separated by ----):
        {bundle}

        Select only the relevant papers and return them as a JSON array. 
        For selecting relevant papers, make sure it would be of interest for
        a research lab that works on circuit QED and experimental quantum computing 
        (physics and engineering) with superconducting qubits. Err on the side of 
        exclusion, i.e., return only the most 3-4 relevant papers. Only if there 
        are more that are EXTREMELY relevant should you include more.

        Each returned object must have: title (str), url (str), reason (str).
    """

    response = client.models.generate_content(
        model="gemini-2.5-flash",   # fast + free tier friendly
        contents=user_prompt,
        config=types.GenerateContentConfig(
            temperature=0,
            response_mime_type="application/json",  # force valid JSON
        ),
    )

    return json.loads(response.text)



In [21]:
filter_papers_with_gemini(papers_today, "superconducting qubits")

[{'title': 'Passive detection of Schwinger boson dynamics via a qubit',
  'url': 'https://arxiv.org/abs/2510.00108',
  'reason': "This paper is highly relevant as it proposes an integrated photonic device where a transmon qubit capacitively couples to a microwave cross-resonator, explicitly mentioning 'superconducting circuits' and 'transmon qubit' for quantum sensing. This directly aligns with experimental physics and engineering of superconducting qubits."},
 {'title': 'Exploiting Translational Symmetry for Quantum Computing with Squeezed Cat Qubits',
  'url': 'https://arxiv.org/abs/2510.00497',
  'reason': "This work focuses on 'squeezed cat quantum error correction (QEC) codes' and their implementation for logical operations. Cat qubits are a type of bosonic qubit frequently realized in superconducting circuits, making this paper directly relevant to the physics and engineering of advanced superconducting qubit architectures."},
 {'title': 'Pre-Distillation of Magic States via Comp

In [11]:
papers_today

[{'title': 'Evaluating noises of boson sampling with statistical benchmark methods',
  'authors': [{'name': 'Yang Ji, Yongjin Ye, Qiao Wang, Shi Wang, Jie Hou, Yongzheng Wu, Zijian Wang, Bo Jiang'}],
  'summary': 'arXiv:2510.00056v1 Announce Type: new \nAbstract: The lack of self-correcting codes hiders the development of boson sampling to be large-scale and robust. Therefore, it is important to know the noise levels in order to cautiously demonstrate the quantum computational advantage or realize certain tasks. Based on those statistical benchmark methods such as the correlators and the clouds, which are initially proposed to discriminate boson sampling and other mockups, we quantificationally evaluate noises of photon partial distinguishability and photon loss compensated by dark counts. This is feasible owing to the fact that the output distribution unbalances are suppressed by noises, which are actually results of multi-photon interferences. This is why the evaluation performance i