# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Olofsson  ->  J. Olofsson  |  ['J. Olofsson']
G. Chauvin  ->  G. Chauvin  |  ['G. Chauvin']
T. Henning  ->  T. Henning  |  ['T. Henning']
M. Samland  ->  M. Samland  |  ['M. Samland']
M. Feldt  ->  M. Feldt  |  ['M. Feldt']
J. Li  ->  J. Li  |  ['J. Li']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
F. Walter  ->  F. Walter  |  ['F. Walter']
F. Xu  ->  F. Xu  |  ['F. Xu']


Y. Wu  ->  Y. Wu  |  ['Y. Wu']
T. Henning  ->  T. Henning  |  ['T. Henning']
N. Hoyer  ->  N. Hoyer  |  ['N. Hoyer']
N. Neumayer  ->  N. Neumayer  |  ['N. Neumayer']
E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
Arxiv has 90 new papers today
          9 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/9 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2512.03128
Retrieving document from  https://arxiv.org/e-print/2512.03132
extracting tarball to tmp_2512.03132...

not a gzip file


 done.
Retrieving document from  https://arxiv.org/e-print/2512.03134
extracting tarball to tmp_2512.03134... done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
Retrieving document from  https://arxiv.org/e-print/2512.03143
extracting tarball to tmp_2512.03143...

bad escape \i at position 38


 done.
Retrieving document from  https://arxiv.org/e-print/2512.03433


extracting tarball to tmp_2512.03433... done.
Retrieving document from  https://arxiv.org/e-print/2512.03456
extracting tarball to tmp_2512.03456...

 done.


T. Henning  ->  T. Henning  |  ['T. Henning']


Retrieving document from  https://arxiv.org/e-print/2512.03692
extracting tarball to tmp_2512.03692... done.
Retrieving document from  https://arxiv.org/e-print/2512.03999



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2512.03999... done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


N. Hoyer  ->  N. Hoyer  |  ['N. Hoyer']
N. Neumayer  ->  N. Neumayer  |  ['N. Neumayer']
E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']


Retrieving document from  https://arxiv.org/e-print/2512.04031
extracting tarball to tmp_2512.04031... done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03456-b31b1b.svg)](https://arxiv.org/abs/2512.03456) | **Improving Accretion Diagnostics for Young Stellar Objects with Mid-infrared Hydrogen lines from JWST/MIRI**  |
|| B. Shridharan, et al. -- incl., <mark>T. Henning</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *Accepted for publication in A&A. 17 pages, 9 figures, 1 Table (4 figures, 2 Tables in Appendix) The large tables will be made available in CDS and/or on request to the corresponding author*|
|**Abstract**|            We present a comprehensive study of mid-infrared neutral hydrogen (H~\textsc{i}) emission lines in 79 nearby (d $<$ 200 $pc$) young stars using JWST/MIRI. We aim to identify mid-infrared H~\textsc{i} transitions that can serve as reliable accretion diagnostics in young stars, and evaluate their utility in deriving physical conditions of the accreting gas. We identify and measure 22 H~\textsc{i} transitions in the MIRI wavelength regime (5-28 $\mu m$) and perform LTE slab modelling to remove the H\textsubscript{2}O contribution from selected H~\textsc{i} transitions. We find that mid-IR H~\textsc{i} line emission is spatially compact, even for sources with spatially extended [Ne~\textsc{ii}] and [Fe~\textsc{ii}] jets, suggesting minimal contamination from extended jet. Although Pfund~$\alpha$ (H~\textsc{i}~6--5) and Humphreys~$\alpha$ (H~\textsc{i}~7--6) are the strongest lines, they are blended with H$_2$O transitions. This blending necessitates additional processing to remove molecular contamination, thereby limiting their use as accretion diagnostics. Instead, we identify the H~\textsc{i}~(8--6) at 7.502 $\mu m$ and H~\textsc{i}~(10--7) at 8.760 $\mu m$ transitions as better alternatives, as they are largely unaffected by molecular contamination and offer a more reliable means of measuring accretion rates from MIRI spectra. We provide updated empirical relations for converting mid-IR H~\textsc{i} line luminosities into accretion luminosity for 6 different H~\textsc{i} lines in the MIRI wavelength range. Moreover, comparison of observed line ratios with theoretical models shows that MIR H~\textsc{i} lines offer robust constraints on the hydrogen gas density in accretion columns, $n_\mathrm{H} = $10$^{10.6}$ to 10$^{11.2}$ cm$^{-3}$ in most stars, with some stars exhibiting lower densities ($<10^{10}$~cm$^{-3}$), approaching the optically thin regime.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03999-b31b1b.svg)](https://arxiv.org/abs/2512.03999) | **The Nuclear Star Cluster of M 74: a fossil record of the very early stages of a star-forming galaxy**  |
|| F. Pinna, et al. -- incl., <mark>N. Hoyer</mark>, <mark>N. Neumayer</mark>, <mark>E. Schinnerer</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *13 pages, 8 figures. Currently under the revision process in A&A after positive referee report*|
|**Abstract**|            Nuclear star clusters (NSC) are dense and compact stellar systems, of sizes of few parsecs, located at galactic centers. Their properties and formation mechanisms seem to be tightly linked to the evolution of the host galaxy, with potentially different formation channels for late- and early-type galaxies (respectively, LTGs and ETGs). While most observations target ETGs, here we focus on the NSC in M 74 (NGC 628), a relatively massive, gas-rich and star-forming spiral galaxy, part of the PHANGS survey. We analyzed the central arcmin of the PHANGS-MUSE mosaic, in which the NSC is not spatially resolved. We performed a two-dimensional spectro-photometric decomposition of the MUSE cube, employing a modified version of the C2D code, to disentangle the NSC from the host galaxy. Here we used three components: a bulge, a disk and a NSC approximated to the point spread function (PSF), obtaining three data cubes, one for each component. This allowed us to extract separately the age, metallicity and [Mg/Fe] abundance for the NSC and the host galaxy. Our results show a very old and metal-poor NSC, in contrast to the surrounding regions. While similar properties were found in NSCs hosted by galaxies of different masses and/or morphological types from M 74, they are somewhat unexpected for a relatively massive star-forming spiral galaxy. The spatially resolved stellar populations of the host galaxy display much younger (light-weighted) ages and higher metallicities, especially in the central region (${\sim}500$ pc) surrounding the NSC. This suggests that this NSC formed a long time ago, and evolved passively until today, without any further growth. Our results show that the NSC was not involved in the active recent star-formation history of its host galaxy.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03143-b31b1b.svg)](https://arxiv.org/abs/2512.03143) | **Infrared emission from $z \sim 6.5$ quasar host galaxies: a direct estimate of dust physical properties**  |
|| M. Costa, et al. -- incl., <mark>F. Walter</mark>, <mark>F. Xu</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *18 pages, 10 figures, accepted for publication on A&A*|
|**Abstract**|            Quasars at the dawn of Cosmic Time ($z>6$) are fundamental probes to investigate the early co-evolution of supermassive black holes and their host galaxy. Nevertheless, their infrared spectral energy distribution remains at the present time poorly constrained, due to the limited photometric coverage probing the far-infrared wavelength range where the dust modified black-body is expected to peak ($\sim80$ $\mathrm{\mu m}$). Here we present a study of the high-frequency dust emission via a dedicated ALMA Band 8 ($\sim$400 GHz) campaign targeting 11 quasar host galaxies at $6<z<7$. Combined with archival observations in other ALMA bands, this program enables a detailed characterization of their infrared emission, allowing for the derivation of dust masses ($M_{d}$), dust emissivity indexes ($\beta$), dust temperatures ($T_{d}$), infrared luminosities ($L_{IR}$), and associated star formation rates (SFRs). Our analysis confirms that dust temperature is on average higher in this sample (34-65 K) if compared to local main-sequence galaxies' values, and that this finding can be linked to the increased star formation efficiency we derive in our work, as also suggested by the [CII]$_{158\mu m}$ deficit. Most remarkably, we note that the average value of $T_d$ of this sample doesn't differ from the one that is observed in luminous, ultra-luminous and hyper-luminous infrared galaxies at different redshifts that show no signs of hosting a quasar. Finally, our findings suggest that the presence of a bright AGN does not significantly bias the derived infrared properties, although further high-frequency, high-spatial resolution observations might reveal more subtle impacts on sub-kiloparsec scales.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03132-b31b1b.svg)](https://arxiv.org/abs/2512.03132) | **The DREAMS Project: Disentangling the Impact of Halo-to-Halo Variance and Baryonic Feedback on Milky Way Dark Matter Density Profiles**  |
|| A. M. Garcia, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *19 pages, 9 figures, 1 table, 4 appendices. Comments Welcome! See also arxiv.2512.00148*|
|**Abstract**|            Astrophysical searches for dark matter in the Milky Way require a reliable model for its density distribution, which in turn depends on the influence of baryonic feedback on the Galaxy. In this work, we utilize a new suite of Milky Way-mass halos from the DREAMS Project, simulated with Cold Dark Matter (CDM),to quantify the influence of baryon feedback and intrinsic halo-to-halo variance on dark matter density profiles. Our suite of 1024 halos varies over supernova and black hole feedback parameters from the IllustrisTNG model, as well as variations in two cosmological parameters. We find that Milky Way-mass dark matter density profiles in the IllustrisTNG model are largely insensitive to astrophysics and cosmology variations, with the dominant source of scatter instead arising from halo-to-halo variance. However, most of the (comparatively minor) feedback-driven variations come from the changes to supernova prescriptions. By comparing to dark matter-only simulations, we find that the strongest supernova wind energies are so effective at preventing galaxy formation that the halos are nearly entirely collisionless dark matter. Finally, regardless of physics variation, all the DREAMS halos are roughly consistent with a halo contracting adiabatically from the presence of baryons, unlike models that have bursty stellar feedback. This work represents a step toward assessing the robustness of Milky Way dark matter profiles, with direct implications for dark matter searches where systematic uncertainty in the density profile remains a major challenge.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03433-b31b1b.svg)](https://arxiv.org/abs/2512.03433) | **A Deep Chandra X-ray Survey of a Luminous Quasar Sample at $z\sim$ 7**  |
|| X. Jin, et al. -- incl., <mark>Y. Wu</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *17 pages, 8 figures, 3 tables, accepted for publication in ApJ*|
|**Abstract**|            We present new Chandra observations of seven luminous quasars at $z>6.5$. Combined with archival Chandra observations of all other known quasars, they form nearly complete X-ray observations of all currently known $z\sim7$ quasars with $M_{1450}<-26.5$, except for J0313$-$1806 at $z=7.642$ and J0910$-$0414 at $z=6.636$. Together with existing ground-based NIR spectroscopy and ALMA observations, we investigate the correlations between X-ray emission (the X-ray luminosity $L_{\rm X}$ and the optical/UV-to-X-ray spectral slope $\alpha_{\rm OX}$) and various quasar properties (rest-UV luminosity $L_{\mathrm{2500\ \mathring{A}}}$, bolometric luminosity $L_{\rm bol}$, C IV blueshift, and infrared luminosity $L_{\rm IR}$). We find most $z>6.5$ quasars follow a similar $\alpha_{\rm OX}-L_{\mathrm{2500\ \mathring{A}}}$ relation as $z\sim1-6$ quasars, but also display a large scatter. We find a potential correlation between $\alpha_{\rm OX}$ and the C IV blueshift, suggesting a soft optical/UV-to-X-ray SED shape is frequently associated with fast disk winds. Furthermore, we analyze the X-ray spectrum of 11 quasars at $z>6.5$ with Chandra detection, and find the best-fit photon index $\Gamma$ is $2.41\pm0.27$, which is likely driven by high accretion rates of $z>6.5$ quasars. In addition, we find there are no significant correlations between either $L_{\rm X}$ and $L_{\rm IR}$, nor $L_{\rm bol}$ and $L_{\rm IR}$, suggesting no strong correlations between quasar luminosity and star formation luminosity for the most luminous quasars at $z>6.5$.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03692-b31b1b.svg)](https://arxiv.org/abs/2512.03692) | **Long-term calibration and validation of stability of the Auger Engineering Radio Array using the diffuse Galactic radio emission**  |
|| P. A. Collaboration, et al. |
|*Appeared on*| *2025-12-04*|
|*Comments*| *20 pages, 9 figures, 2 tables*|
|**Abstract**|            The Auger Engineering Radio Array (AERA) measures radio emission from high-energy extensive air showers. Consisting of 153 autonomous radio-detector stations spread over $17$\,km$^2$, it detects radio waves in the frequency range of $30$ to $80$\,MHz. Accurate characterization of the detector response is crucial for proper interpretation of the collected data. Previously, this was achieved through laboratory measurements of the analog chain and simulations and measurements of the antenna's directional response. In this paper, we perform an absolute calibration using the continuously monitored sidereal modulation of the diffuse Galactic radio emission. Calibration is done by comparing the average frequency spectra recorded by the stations with predictions from seven different models of the full radio sky, accounting for the system response, which includes the antenna, filters, and amplifiers. The analysis of the calibration constants over a period of seven years shows no relevant and no significant ageing effect in the AERA antennas. This result confirms the long-term stability of the detector stations and demonstrates the possibility for a radio detector to effectively monitor ageing effects of other detectors operating over extended periods.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.04031-b31b1b.svg)](https://arxiv.org/abs/2512.04031) | **Large Language Models for Limited Noisy Data: A Gravitational Wave Identification Study**  |
|| Y. Li, et al. -- incl., <mark>Y. Wang</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *10 pages, 5 figures*|
|**Abstract**|            This work investigates whether large language models (LLMs) offer advantages over traditional neural networks for astronomical data processing, in regimes with non-Gaussian, non-stationary noise and limited labeled samples. Gravitational wave observations provide an suitable test case, using only 90 LIGO events, finetuned LLMs achieve 97.4\% accuracy for identifying signals. Further experiments show that, in contrast to traditional networks that rely on large simulated datasets, additional simulated samples do not improve LLM performance, while scaling studies reveal predictable gains with increasing model size and dataset size. These results indicate that LLMs can extract discriminative structure directly from observational data and provide an efficient assessment for gravitational wave identification. The same strategy may extend to other astronomical domains with similar noise properties, such as radio or pulsar observations.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03134-b31b1b.svg)](https://arxiv.org/abs/2512.03134) | **The Mass-Metallicity Relation and its Observational Effects at z~3-6**  |
|| Z. Lewis, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>H.-W. Rix</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *16 pages, 8 figures, submitted to ApJ*|
|**Abstract**|            The correlation between galaxy stellar mass and gas-phase metallicity, known as the mass-metallicity relation (MZR), gives key insights into the processes that govern galaxy evolution. However, unquantified observational and selection biases can result in systematic errors in attempts to recover the intrinsic MZR, particularly at higher redshifts. We characterize the MZR at z~3-6 within a fully Bayesian framework using JWST NIRSpec spectra of 193 galaxies from the RUBIES survey. We forward model the observed mass-metallicity surface using prospector-generated spectra to account for two selection biases: the survey selection function and success in observing high signal-to-noise emission lines. We demonstrate that the RUBIES selection function, based on F444W magnitude and F150W-F444W color, has a negligible effect on our measured MZR. A correct treatment of the non-Gaussian metallicity uncertainties from strong-line calibrations lowers the derived MZR normalization by 0.2 dex and flattens the slope by ~20%; forward-modeling the effect of emission line observability steepens the slope by ~15%. Both of these biases must be taken into account in order to properly measure the intrinsic MZR. This novel forward modeling process motivates careful consideration of selection functions in future surveys, and paves the way for robust, high-redshift chemical enrichment studies that trace the evolution of the mass-metallicity relation across cosmic time.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error bad escape \i at position 38</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2512.03128-b31b1b.svg)](https://arxiv.org/abs/2512.03128) | **Characterization of debris disks observed with SPHERE**  |
|| N. Engler, et al. -- incl., <mark>J. Olofsson</mark>, <mark>G. Chauvin</mark>, <mark>T. Henning</mark>, <mark>M. Samland</mark>, <mark>M. Feldt</mark> |
|*Appeared on*| *2025-12-04*|
|*Comments*| *57 pages, 33 figures, 13 tables*|
|**Abstract**|            This study aims to characterize debris disks observed with SPHERE across multiple programs, with the goal of identifying systematic trends in disk morphology, dust mass, and grain properties as a function of stellar parameters. We analyzed a sample of 161 young stars using SPHERE observations at optical and near-IR wavelengths. Disk geometries were derived from ellipse fitting and model grids, while dust mass and properties were constrained by modified blackbody (MBB) and size distribution (SD) modeling of SEDs. The dynamical modeling was performed to assess whether the observed disk structures can be explained by the presence of unseen planets. We resolved 51 debris disks, including four new detections: HD 36968, BD-20 951, and the inner belts of HR 8799 and HD 36546. In addition, we found a second transiting giant planet in the HD 114082 system, with a radius of 1.29 $R_{\rm Jup}$ and an orbital distance of ~1 au. We identified nine multi-belt systems, with outer-to-inner belt radius ratios of $1.5-2$, and found close agreement between scattered-light and millimeter-continuum belt radii. They scale weakly with stellar luminosity ($R_{\rm belt} \propto L_{\star}^{0.11}$), but show steeper dependencies when separated by CO and CO$_2$ freeze-out regimes. Disk fractional luminosities follow collisional decay trends, declining as $t_{\rm age}^{-1.18}$ for A and $t_{\rm age}^{-0.81}$ for F stars. The inferred dust masses span $10^{-5}-1\,M_\oplus$ from MBB and $0.01-1\,M_\oplus$ from SD modeling. These masses scale as $R_{\rm belt}^n$ with $n>2$ in belt radius and super-linearly with stellar mass, consistent with trends seen in protoplanetary disks. Analysing correlation between disk polarized flux and IR excess, we found an offset of ~1 dex between total-intensity (HST) and polarized fluxes. A new parametric approach to estimate dust albedo and maximum polarization fraction is introduced.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2512.03456/./plots_v4/v4_accretion_relations_swapped.png', 'tmp_2512.03456/./plots_v4/v4_water_contribution_to_HI.png', 'tmp_2512.03456/./plots_v4/slope_wavelength.png']
copying  tmp_2512.03456/./plots_v4/v4_accretion_relations_swapped.png to _build/html/
copying  tmp_2512.03456/./plots_v4/v4_water_contribution_to_HI.png to _build/html/
copying  tmp_2512.03456/./plots_v4/slope_wavelength.png to _build/html/
exported in  _build/html/2512.03456.md
    + _build/html/tmp_2512.03456/./plots_v4/v4_accretion_relations_swapped.png
    + _build/html/tmp_2512.03456/./plots_v4/v4_water_contribution_to_HI.png
    + _build/html/tmp_2512.03456/./plots_v4/slope_wavelength.png
found figures ['tmp_2512.03999/./figures/NGC0628phangs_spectra_psfc2d_compare_bdnucl_paper.png', 'tmp_2512.03999/./figures/NGC0628_hostgal_Hbmask_SN100_SNthresh3_smiles_popmaps_vert.png', 'tmp_2512.03999/./figures/NGC0628_original_Hbmask_SN100.0_SNthresh3.0_smiles_popmaps_vert.png']
copying  tmp_2512.03999/./

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Improving accretion diagnostics for young stellar objects with mid-infrared hydrogen lines from JWST/MIRI

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2512.03456-b31b1b.svg)](https://arxiv.org/abs/2512.03456)<mark>Appeared on: 2025-12-04</mark> -  _Accepted for publication in A&A. 17 pages, 9 figures, 1 Table (4 figures, 2 Tables in Appendix) The large tables will be made available in CDS and/or on request to the corresponding author_

</div>
<div id="authors">

B. Shridharan, et al. -- incl., <mark>T. Henning</mark>

</div>
<div id="abstract">

**Abstract:** We present a comprehensive study of mid-infrared neutral hydrogen (H i ) emission lines in 79 nearby (d $<$ 200 $pc$ ) young stars using the _James Webb Space Telescope_ ( _JWST_ ) Mid-Infrared Instrument (MIRI). This work extends accretion diagnostics to mid-infrared H i transitions, which are less affected by extinction and outflow emission compared to optical and near-infrared H i lines. We aim to identify mid-infrared H i transitions that can serve as reliable accretion diagnostics in young stars, and evaluate their utility in deriving physical conditions of the accreting gas. We identify and measure 22 H i transitions in the MIRI wavelength regime (5–28 $\mu m$ ) and perform LTE slab modelling to remove the H \textsubscript{2} O contribution from selected H i transitions. We examine the spatial extent of MIR H i emission and assess contamination from molecular and jet-related emission. We find that mid-IR H i line emission is spatially compact, even for sources with spatially extended [ Ne ii ] and [ Fe ii ] jets, suggesting minimal contamination from extended jet. Although Pfund $\alpha$ (H i 6--5) and Humphreys $\alpha$ (H i 7--6) are the strongest lines in the mid-infrared, they are blended with $H_2$ O transitions. This blending necessitates additional processing to remove molecular contamination, thereby limiting their use as accretion diagnostics. Instead, we identify the H i (8--6) at 7.502 $\mu m$ and H i (10--7) at 8.760 $\mu m$ transitions as better alternatives, as they are largely unaffected by molecular contamination and offer a more reliable means of measuring accretion rates from MIRI spectra. We provide updated empirical relations for converting mid-IR H i line luminosities into accretion luminosity for 6 different H i lines in the MIRI wavelength range. Moreover, comparison of observed line ratios with theoretical models shows that MIR H i lines offer robust constraints on the hydrogen gas density in accretion columns, $n_\mathrm{H} = $ 10 $^{10.6}$ to 10 $^{11.2}$ cm $^{-3}$ in most stars, with some stars exhibiting lower densities ( $<10^{10}$ cm $^{-3}$ ), approaching the optically thin regime.

</div>

<div id="div_fig1">

<img src="tmp_2512.03456/./plots_v4/v4_accretion_relations_swapped.png" alt="Fig6" width="100%"/>

**Figure 6. -** New and updated H i empirical relations. Empirical correlations between accretion luminosity (log($L_{\rm acc}/L_{\odot}$)) and MIR H i line luminosities (log($L_{\rm line}/L_{\odot}$)) for six transitions analysed in this work. Each panel displays the linear regression best-fit (black dashed line) with 1$\sigma$ and 3$\sigma$ confidence intervals (shaded regions). The best-fit slope and intercept are shown in the bottom-right corner of each panel and listed in Table \ref{tab:2}. Blue open squares represent sources without correction for $H_2$O contamination, while green open circles correspond to sources where the H i line fluxes have been corrected using LTE $H_2$O models. The pink circles represent the newly measured log(L$_{acc}$) values for sources that lack literature values, while the red arrows denote 3$\sigma$ upper limits for non-detections. The dash-dotted lines in the middle panels indicate the empirical relations reported by [Tofflemire, et. al (2025)](https://ui.adsabs.harvard.edu/abs/2025ApJ...985..224T) for reference. (*fig:5*)

</div>
<div id="div_fig2">

<img src="tmp_2512.03456/./plots_v4/v4_water_contribution_to_HI.png" alt="Fig5" width="100%"/>

**Figure 5. -** Contribution of H\textsubscript{2}O to 3 H i lines. Left panels: Estimated percentage contribution of $H_2$O emission to the observed H i transitions—H i(12--7), H i(7--6), and H i(6--5)—across the Class II disk sample. Blue stars indicate the fractional $H_2$O contribution for each source. Green upward arrows denote sources where the entire flux is attributable to $H_2$O, while red downward arrows mark sources for which $H_2$O modelling was not applied due to either dominant H i flux or negligible $H_2$O emission. Right panels: Violin plots showing the distributions of best-fit physical parameters from LTE $H_2$O models for each wavelength region. The temperature and effective emitting radius exhibit an anti-correlation, consistent with compact hot inner disk emission dominating at shorter wavelengths. The $H_2$O column densities remain relatively constant across the different spectral regions.
 (*fig:4*)

</div>
<div id="div_fig3">

<img src="tmp_2512.03456/./plots_v4/slope_wavelength.png" alt="Fig7" width="100%"/>

**Figure 7. -** {Comparison of different H i empirical relations. (Left) Estimated empirical slopes of optical/NIR H i lines from the literature  ([Alcal\'a, et. al 2017](https://ui.adsabs.harvard.edu/abs/2017A&A...600A..20A), [Rogers, de Marchi and Brandl 2024](https://ui.adsabs.harvard.edu/abs/2024A&A...684L...8R), [Salyk, et. al 2013](https://ui.adsabs.harvard.edu/abs/2013ApJ...769...21S))  and mid-infrared H i lines from this work. The colour bar and marker colours indicate the wavelength of each H i transition. The median slope of the distribution is $\sim$1.1, shown with a dashed line.} The slope of the Pa$\alpha$ empirical relation  ([Rogers, de Marchi and Brandl 2024](https://ui.adsabs.harvard.edu/abs/2024A&A...684L...8R))  is slightly offset from other H i lines, as it was calibrated using _JWST_ spectra of young stars in the Large Magellanic Cloud (LMC), which has sub-solar metallicity. This lower metallicity likely affects the slope. {(Right) Comparison of empirical relations between accretion luminosity ($\log L_{\mathrm{acc}}$) and line luminosity ($\log L_{\mathrm{line}}$) for representative H i transitions from optical to mid-infrared wavelengths. The slopes are consistent across this wide wavelength range.} (*fig:6*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2512.03456"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\pymusepipe}{\textsc{pymusepipe}}$
$\newcommand{\pyneb}{\textsc{pyneb}}$
$\newcommand{\ppxf}{\textsc{pPXF}}$
$\newcommand{\powerlaw}{\textsc{powerlaw}}$
$\newcommand{\DrSFMS}{\Delta rSFMS}$
$\newcommand{\DrKS}{\Delta rKS}$
$\newcommand{\DrMGMS}{\Delta rMGMS}$
$\newcommand{\rSFMS}{rSFMS}$
$\newcommand{\rKS}{rKS}$
$\newcommand{\rMGMS}{rMGMS}$
$\newcommand{\Htwo}{\ensuremath{\mathrm{H_{2}}}}$
$\newcommand{\OIII}{\textup{[O \textsc{iii}]}}$
$\newcommand{\OII}{\textup{[O \textsc{ii}]}}$
$\newcommand{\OI}{\textup{[O \textsc{i}]}}$
$\newcommand{\NII}{\textup{[N \textsc{ii}]}}$
$\newcommand{\SII}{\textup{[S \textsc{ii}]}}$
$\newcommand{\SIII}{\textup{[S \textsc{iii}]}}$
$\newcommand{\HII}{\textup{H \textsc{ii}}}$
$\newcommand{\Ha}{\textup{H}\ensuremath{\alpha}}$
$\newcommand{\Hb}{\textup{H}\ensuremath{\beta}}$
$\newcommand{\re}{R_\mathrm{e}}$
$\newcommand{\Te}{T_\mathrm{e}}$
$\newcommand{\Rtwentyfive}{R_{25}}$
$\newcommand{\SHa}{\Sigma_{\mathrm{H\alpha}}}$
$\newcommand{\SHtwo}{\Sigma_{\mathrm{H_{2}}}}$
$\newcommand{\SMstar}{\Sigma_{\mathrm{M_{*}}}}$
$\newcommand{\Ssfr}{\ensuremath{\Sigma_{\mathrm{SFR}}}}$
$\newcommand{\HIIphot}{\textsc{\HIIphot}}$
$\newcommand{\Lmin}{L_\mathrm{min}}$
$\newcommand{\msun}{ M_{\sun}}$
$\newcommand{\MPIA}{\label{MPIA}Max-Planck-Institut für Astronomie, Königstuhl 17, D-69117, Heidelberg, Germany}$
$\newcommand{\IAC}{\label{IAC}Instituto de Astrofísica de Canarias, calle Vía Láctea s/n, E-38205 La Laguna, Tenerife, Spain}$
$\newcommand{\ULL}{\label{ULL}Departamento de Astrofísica, Universidad de La Laguna, Avenida Astrofísico Francisco Sánchez s/n, E-38206 La Laguna, Spain}$
$\newcommand{\UCA}{\label{UCA}Université C\^ote d'Azur, Observatoire de la C\^ote d'Azur, CNRS, Laboratoire Lagrange, 06000, Nice, France}$
$\newcommand{\UWyoming}{\label{UWyoming}Department of Physics and Astronomy, University of Wyoming, Laramie, WY 82071, USA}$
$\newcommand{\stromlo}{\label{stromlo}Research School of Astronomy and Astrophysics, Australian National University, Mt Stromlo Observatory, Weston Creek, ACT 2611, Australia}$
$\newcommand{\ITA}{\label{ITA}Universität Heidelberg, Zentrum für Astronomie, Institut für Theoretische Astrophysik, Albert-Ueberle-Str 2, D-69120 Heidelberg, Germany}$
$\newcommand{\IWR}{\label{IWR}Universität Heidelberg, Interdisziplinäres Zentrum für Wissenschaftliches Rechnen, Im Neuenheimer Feld 205, D-69120 Heidelberg, Germany}$
$\newcommand{\OAN}{\label{OAN}Observatorio Astronómico Nacional (IGN), C/Alfonso XII, 3, E-28014 Madrid, Spain}$
$\newcommand{\JBCfA}{\label{JBCfA}UK ALMA Regional Centre Node, Jodrell Bank Centre for Astrophysics, Department of Physics and Astronomy, The University of Manchester, Oxford Road, Manchester M13 9PL, UK}$
$\newcommand{\ADOhio}{\label{ADOhio}Astronomy Department, The Ohio State University, Columbus, Ohio, USA$
$}$
$\newcommand{ÇOhio}{\label{CCOhio}Center for Cosmology and Astro-Particle Physics, The Ohio State University, Columbus, Ohio, USA$
$}$
$\newcommand{\deg}{\mbox{^{\circ}}}$
$\newcommand\fp{#1}$
$\newcommand\fpcut{#1}$</div>



<div id="title">

# The Nuclear Star Cluster of M 74: a fossil record of the very early stages of a star-forming galaxy

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2512.03999-b31b1b.svg)](https://arxiv.org/abs/2512.03999)<mark>Appeared on: 2025-12-04</mark> -  _13 pages, 8 figures. Currently under the revision process in A&A after positive referee report_

</div>
<div id="authors">

F. Pinna, et al. -- incl., <mark>N. Hoyer</mark>, <mark>N. Neumayer</mark>, <mark>E. Schinnerer</mark>

</div>
<div id="abstract">

**Abstract:** Nuclear star clusters (NSC) are dense and compact stellar systems, of sizes of few parsecs, located at galactic centers.Their properties and formation mechanisms seem to be tightly linked to the evolution of the host galaxy, with potentially different formation channels for late- and early-type galaxies (respectively, LTGs and ETGs). While most observations target ETGs, here we focus on the NSC in M 74 (NGC 628), a relatively massive, gas-rich and star-forming spiral galaxy, part of the PHANGS survey. We analyzed the central arcmin of the PHANGS-MUSE mosaic, in which the NSC is not spatially resolved.We performed a two-dimensional spectro-photometric decomposition of the MUSE cube, employing a modified version of the C2D code, to disentangle the NSC from the host galaxy.Here we used three components: a bulge, a disk and a NSC approximated to the point spread function (PSF), obtaining three data cubes, one for each component. This allowed us to extract separately the age, metallicity and [ Mg/Fe ] abundance for the NSC and the host galaxy.Our results show a very old and metal-poor NSC, in contrast to the surrounding regions. While similar properties were found in NSCs hosted by galaxies of different masses and/or morphological types from M 74, they are somewhat unexpected for a relatively massive star-forming spiral galaxy. The spatially resolved stellar populations of the host galaxy display much younger (light-weighted) ages and higher metallicities, especially in the central region ( ${\sim}500$ pc) surrounding the NSC. This suggests that this NSC formed a long time ago, and evolved passively until today, without any further growth. Our results show that the NSC was not involved in the active recent star-formation history of its host galaxy.

</div>

<div id="div_fig1">

<img src="tmp_2512.03999/./figures/NGC0628phangs_spectra_psfc2d_compare_bdnucl_paper.png" alt="Fig2" width="100%"/>

**Figure 2. -** Spectra obtained by integrating, within an aperture of $1\sigma$ of the PSF, the prepared (_ original_) data cube before the spectrophotometric decomposition (_ blue solid_), the _ NSC_ data cube (_ red dashed_), and the _ host-galaxy_ data cube (_ green dashed_).
 (*fig:spectra*)

</div>
<div id="div_fig2">

<img src="tmp_2512.03999/./figures/NGC0628_hostgal_Hbmask_SN100_SNthresh3_smiles_popmaps_vert.png" alt="Fig3" width="100%"/>

**Figure 3. -** Stellar population maps of the host-galaxy component of M 74, after subtracting the NSC. From top to bottom: light-weighted mean age, total metallicity [M/H] and [Mg/Fe] abundance. Masked regions are depicted in white and isophotal contours in black. The physical scale is given as a reference in the top $X$ axis.
 (*fig:host_pop_maps*)

</div>
<div id="div_fig3">

<img src="tmp_2512.03999/./figures/NGC0628_original_Hbmask_SN100.0_SNthresh3.0_smiles_popmaps_vert.png" alt="Fig6" width="100%"/>

**Figure 6. -** Stellar population maps of the central arcmin of M 74, extracted from the integrated data cube. From top to bottom: mean age, total metallicity [M/H] and [Mg/Fe] abundance. Masked regions are depicted in white and isophotal contours in black. The physical scale is given as a reference in the top $X$ axes.
 (*fig:orig_pop_maps*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2512.03999"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

138  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

15  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
