# arXiv-W | lean

Produces a reasonably formatted list from arXiv for weekly viewing.

Issues: Does not yet export to PDF. Produces a markdown file.

## Parameters

In [1]:
startday="20231225"
endday="20231231"

abstracts=True

## Initialise and Fetch from arXiv

In [2]:
import requests

from dateutil import parser
from IPython.display import display, Markdown
from datetime import datetime, timedelta #likely don't need this one

from time import time
from xml.etree import ElementTree as ET

# # from markdown2pdf import convertToPDF
# import pypandoc

# The category you want to fetch
a="quant-ph"

abstracts = True

# URL of the XML object
# url = "https://export.arxiv.org/api/query?search_query=all:%s&sortBy=lastUpdatedDate&sortOrder=descending&max_results=600" % a.lower().replace(' ','%20')
# url = "https://export.arxiv.org/api/query?search_query=cat:quant-ph&submittedDate:%5B202401100000+TO+202401170000%5D&max_results=600"
# url = "https://export.arxiv.org/api/query?search_query=cat:quant-ph&submittedDate:%5B201901130630+TO+201901131645%5D&max_results=10"


url = "https://export.arxiv.org/api/query?search_query=cat:quant-ph+AND+lastUpdatedDate:%5B"+startday+"0000+TO+"+endday+"0000%5D&max_results=1000&sortBy=lastUpdatedDate&sortOrder=ascending"

# url = "https://export.arxiv.org/api/query?search_query=cat:quant-ph+AND+submittedDate:%5B201901130630+TO+201901141645%5D&max_results=10"

# Send a GET request to the URL
response = requests.get(url)

# Parse the XML response
root = ET.fromstring(response.content)

# Namespace dictionary to find elements
namespaces = {'atom': 'http://www.w3.org/2005/Atom', 'arxiv': 'http://arxiv.org/schemas/atom'}



## Create the Markdown

In [3]:
# Open the output file with UTF-8 encoding

#Date beautifications
startdate = parser.parse(startday)
startdayname = startdate.strftime("%A")

enddate = parser.parse(endday)
enddayname = enddate.strftime("%A")

refdate = datetime.now().replace(tzinfo=None)




lastdate = refdate
daycount=0
with open(f"arxiv_{startdate.date()}_to_{enddate.date()}_created_{refdate.date()}.md", "w", encoding='utf-8') as file:
    refDayName = refdate.strftime("%A")
    
    file.write(f"## arXiv:quant-ph—from {startdayname}, {startdate.date()} to {enddayname}, {enddate.date()}\n\n")
    # Iterate over each entry in the XML data
    for entry in root.findall('atom:entry', namespaces):
        # Extract the date
        updated = entry.find('atom:updated', namespaces).text
        published = entry.find('atom:published', namespaces).text
        
        #Check if it is older than date interval
        date_object = parser.parse(updated).replace(tzinfo=None)

        if(lastdate.date() != date_object.date()):
            dayName = date_object.strftime("%A")
            file.write(f"## {dayName} | {date_object.date()}\n\n")
            lastdate = date_object
            
            

        # Extract and write the title
        title = entry.find('atom:title', namespaces).text
        title = ' '.join(title.split())  # Replace newlines and superfluous whitespace with a single space
        if(abstracts):
            file.write(f"<details> <summary> <b>{title}</b>—")
        else:
            file.write(f"<b>{title}</b>—")
        # file.write(f"**{title}—")


        # Extract and write the link to the paper
        id = entry.find('atom:id', namespaces).text
        if (published == updated):
            file.write(f"{parser.parse(published).date()}" )
        else:
            file.write(f"{parser.parse(published).date()} (updated: {parser.parse(updated).date()})" )
        file.write(f"\n\n [[arXiv]({id})] ")

        # Extract and write the authors
        authors = entry.findall('atom:author', namespaces)
        for author in authors:
            name = author.find('atom:name', namespaces).text
            if author == authors[-1]:
                file.write(f"{name}")
            else:
                file.write(f"{name}, ")
        file.write("\n")

        # Extract and write the summary
        if(abstracts):
            summary = entry.find('atom:summary', namespaces).text
            # file.write(f"<details> <summary>\n\n **Abstract** </summary> \n{summary} </details>\n\n")
            file.write(f"\n\n </summary> \n\n **Abstract** \n{summary} </details>\n\n")
        else:
            file.write(f"\n\n")

# convert(f"arxiv_{refdate}.md",f"arxiv_{refdate}.pdf")

## Preview

In [None]:
# Read the content of the Markdown file
with open(f"arxiv_{refdate}.md", "r") as file:
    markdown_content = file.read()

# Display the content as Markdown
display(Markdown(markdown_content))