# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning', 'R. E. Hviding']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

Y. Wang  ->  Y. Wang  |  ['Y. Wang']
F. Xu  ->  F. Xu  |  ['F. Xu']
F. Walter  ->  F. Walter  |  ['F. Walter']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
S. Kumar  ->  S. Kumar  |  ['S. Kumar']
Y. Wu  ->  Y. Wu  |  ['Y. Wu']
Arxiv has 70 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2602.11512
extracting tarball to tmp_2602.11512... done.
Retrieving document from  https://arxiv.org/e-print/2602.11617


extracting tarball to tmp_2602.11617...

 done.


list index out of range


Retrieving document from  https://arxiv.org/e-print/2602.11783


extracting tarball to tmp_2602.11783... done.


F. Walter  ->  F. Walter  |  ['F. Walter']


Retrieving document from  https://arxiv.org/e-print/2602.11864
extracting tarball to tmp_2602.11864...

 done.
Retrieving document from  https://arxiv.org/e-print/2602.11936


extracting tarball to tmp_2602.11936...

 done.
Retrieving document from  https://arxiv.org/e-print/2602.12051


extracting tarball to tmp_2602.12051...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2602.11783-b31b1b.svg)](https://arxiv.org/abs/2602.11783) | **Characterising Ly$α$ damping wings at the onset of reionisation: Evidence for highly efficient star formation driven by dense, neutral gas in UV-bright galaxies at $z>9$**  |
|| C. L. Pollock, et al. -- incl., <mark>F. Walter</mark> |
|*Appeared on*| *2026-02-13*|
|*Comments*| *Submitted to A&A*|
|**Abstract**|            One of the major conundrums in contemporary extragalactic astrophysics is the apparent overabundance of a remarkable population of UV-bright galaxies at redshifts $z\gtrsim 9$. We analyse galaxies spectroscopically observed by JWST/NIRSpec Prism and confirmed to lie at $z>9$, with sufficient signal-to-noise to carefully model their rest-frame UV to optical continua and line emission. In particular, we model the damped Lyman-$\alpha$ (Ly$\alpha$) absorption (DLA) features of each galaxy to place observational constraints on the gas assembly of neutral atomic hydrogen (HI) onto the galaxy halos at the onset of cosmic reionisation. Based on the derived HI column densities and star-formation rate (SFR) surface densities, we show that all galaxies are highly efficient at forming stars on rapid $\sim 10-100\,$Myr depletion timescales, greatly in excess compared to the canonical local universe Kennicutt-Schmidt relation and predictions from state-of-the-art galaxy formation simulations. The dense HI gas appears to also drive the offset from the fundamental-metallicity relation of these galaxies though its dust-to-gas ratio is seemingly consistent with values derived for local galaxies except for the lowest metallicity sight-lines. Our results provide the first robust observational constraints on the impact of pristine HI gas on early galaxy assembly, and imply that a combination of highly efficient star formation and low dust obscuration can likely explain the UV-brightness of galaxies at cosmic dawn.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2602.11512-b31b1b.svg)](https://arxiv.org/abs/2602.11512) | **Stone Skipping Black Holes in Ultralight Dark Matter Solitons**  |
|| A. Zhang, <mark>Y. Wang</mark>, J. L. Zagorac, R. Easther |
|*Appeared on*| *2026-02-13*|
|*Comments*| *19 pages, 10 figures*|
|**Abstract**|            The orbit of a black hole moving within an ultralight dark matter (ULDM) soliton is naively expected to decay due to dynamical friction. However, single black holes can undergo ``stone skipping'', with their orbital radius varying quasi-periodically. We show that stone skipping is induced by the dipole excitation of the soliton. We model it as resonance in a forced, damped harmonic oscillator, demonstrating that the coherent response of the soliton can significantly modify the dynamics of objects orbiting within it. This suggests that a dipole perturbation of a soliton can modify inspiral timescales if the black holes masses are significantly less than the soliton mass, with implications for supermassive black hole dynamics, the final parsec problem and gravitational wave observations in a ULDM cosmology.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2602.11864-b31b1b.svg)](https://arxiv.org/abs/2602.11864) | **Selecting Optimal Stellar Calibration Fields for the CSST Imaging Survey**  |
|| C. Ling, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2026-02-13*|
|*Comments*| *20 pages, 7 figures, submitted to RAA*|
|**Abstract**|            The Chinese Space Station Survey Telescope (CSST) will perform a decade-long high-precision wide-field imaging survey that relies on rigorous on-orbit calibration. This necessitates stable celestial benchmark fields to maintain photometric and astrometric consistency throughout the mission lifetime. We establish comprehensive selection criteria including observational visibility, stellar number density, bright-star contamination, and interstellar dust extinction. Using the CSST Observation Strategy Analysis Tool (COSAT) and all-sky dust maps from Planck and SFD, we constrain eligible regions to the ranges of ecliptic latitude $ |\beta| > 50^\circ$ and galactic latitude $|b| > 15^\circ$. From an initial sample of 29 candidate clusters meeting these spatial constraints, six globular clusters (M13, M92, NGC 104, NGC 362, NGC 1261, and NGC 1851) are identified as optimal calibration fields, fulfilling all the critical criteria. These selected clusters are recommended as optimal calibration field candidates for CSST's on-orbit calibration program, and are fundamental to achieving unprecedented photometric precision in CSST's space-based survey.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2602.11936-b31b1b.svg)](https://arxiv.org/abs/2602.11936) | **Probing Dynamical Dark Energy with Late-Time Data: Evidence, Tensions, and the Limits of the $w_0w_a$CDM Framework**  |
|| T. Xu, et al. -- incl., <mark>S. Kumar</mark> |
|*Appeared on*| *2026-02-13*|
|*Comments*| *18 pages, 9 figures*|
|**Abstract**|            We test the dynamical dark-energy $w_0w_a$CDM (CPL) framework against $\Lambda$CDM using CMB anisotropies and lensing together with late-time distance probes: DESI DR2 BAO, the completed SDSS-IV BAO consensus compilation, a transverse/angular BAO compilation (BAOtr), and the Cepheid-calibrated PantheonPlus SN~Ia likelihood (PP\&SH0ES). We find that CPL inferences are strongly dataset-dependent. With CMB data alone, the broad geometric degeneracy in $(H_0,\Omega_{\rm m},w_0,w_a)$ admits an extrapolation tail that can extend to $q_0<-1$ (super-acceleration), whereas adding DESI DR2 BAO pulls the reconstruction toward a weakly accelerating or nearly coasting present-day Universe ($q_0\simeq 0$). In contrast, combining CMB with PP\&SH0ES and BAOtr yields a conventional moderately accelerating expansion ($-1<q_0\lesssim 0$) and substantially reduces the Hubble tension. Across all combinations, $w(z\to\infty)=w_0+w_a<-1$, while at post-recombination redshifts the expansion remains matter dominated ($q\to1/2$). The origin of this behavior can be traced to low-redshift distance information: BAOtr and DESI prefer different BAO distance ratios at $z\lesssim 0.5$, which propagates into divergent expansion histories in CPL. In all cases, $r_{\rm d}$ stays nearly unchanged, indicating that shifts in $H_0$ arise from late-time expansion freedom rather than early-Universe physics. Bayesian evidence mirrors this contingency: it is strong for CPL mainly when PP\&SH0ES and/or BAOtr are included, while it is inconclusive for CMB-only and CMB+DESI and moderately favors $\Lambda$CDM for CMB+SDSS. Overall, our results show that the apparent support for CPL and its ability to ease the Hubble tension are not universal but depend sensitively on the adopted low-redshift distance data, motivating either more flexible late-time models or closer scrutiny of residual systematics in current BAO determinations.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2602.12051-b31b1b.svg)](https://arxiv.org/abs/2602.12051) | **The Interstellar Scintillation of the Radio-Loud Magnetar XTE J1810-197**  |
|| R. Wang, et al. -- incl., <mark>Y. Wu</mark> |
|*Appeared on*| *2026-02-13*|
|*Comments*| *7 pages, 5 figures, accepted by Chinese Physics Letters*|
|**Abstract**|            We present a comprehensive interstellar scintillation (ISS) study of the radio-loud magnetar XTE~J1810$-$197, based on six years of multi-frequency monitoring (2018$-$2024) with the Shanghai Tian Ma Radio Telescope (TMRT) at 7.0, 8.6, and 14.0~GHz. The scintillation parameters--decorrelation bandwidth $\Delta\nu_{\rm d}$, decorrelation time $\Delta\tau_{\rm d}$, and drift rate $dt/d\nu$--are fully characterized. Our measured $\Delta\tau_{\rm d}$ implies $\Delta\tau_{\rm d} < 4$~s at 575-725~MHz under a Kolmogorov spectrum, which is shorter than the magnetar's 5.54~s spin period. This result naturally explains the previously reported absence of pulse-to-pulse coherence at these frequencies. Kinematic modeling locates the dominant scattering screen at $1.6\pm0.1$~kpc away from the Earth, within the Sagittarius Arm. The screen coincides with the HII region JCMTSE~J180921.2$-$201932 and is unrelated to the magnetar's 2018 outburst suggested by earlier studies. A scintillation arc detected at 14.0~GHz represents the highest-frequency arc observed to date. The asymmetry of arcs is linearly correlated with a dispersion-measure gradient across the screen ($r = 0.959$, $p < 10^{-8}$). We also measure its refractive scintillation timescale, which is only $1.21\pm0.19$~d. Clear DISS at 14~GHz effectively resolves the debate over a possible strong-to-weak scattering transition at this frequency. These results extend the ISS characterization of magnetars to previously unexplored frequencies and provide a precise probe of the ionized interstellar medium in the Sagittarius Arm.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2602.11617-b31b1b.svg)](https://arxiv.org/abs/2602.11617) | **The ALMA-QUARKS Survey: Discovery of Dusty Fibrils inside Massive Star-forming Clumps**  |
|| Y.-K. Zhang, et al. -- incl., <mark>F. Xu</mark> |
|*Appeared on*| *2026-02-13*|
|*Comments*| *Submitted to ApJL*|
|**Abstract**|            We report the discovery of more than 323 superfine dusty filamentary structures (fibrils) inside 121 massive star forming clumps that are located in widely different Galactic environments (Galactocentric distances of $\sim$0.5-12.7 kpc). These fibrils are identified from the 1.3~mm continuum emission in the ALMA-QUARKS survey, which has a linear resolution of $\sim900$ AU for a source at $\sim$3 kpc, using the \textit{FilFinder} software. Using \textit{RadFil} software, we find that the typical width of these fibrils is $\sim$0.01 pc, which is about ten times narrower than that of dusty filaments in nearby clouds identified by the \textit{Herschel} Space Observatory. The mass ($M$) versus length ($L$) relation for these fibrils follows $M\propto L^{2}$, similar to that of Galactic filaments identified in space (e.g., \textit{Herschel}) and ground-based single-dish (e.g., \textit{APEX}) surveys. However, these fibrils are significantly denser ($\mathrm{N_{H_2} = 10^{23}-10^{24}\ cm^{-2}}$) than the filaments found in previous \textit{Herschel} surveys ($\mathrm{N_{H_2} = 10^{20}-10^{23}\ cm^{-2}}$). This work contributes a large sample of superfine fibrils in massive clumps, following the identification of large 0.1-pc wide filaments and associated internal velocity coherent fibers in nearby molecular clouds, further emphasizing the crucial role played by filamentary structures in star formation at various physical scales.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error list index out of range</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2602.11783/./Figs/Example_and_corner.png', 'tmp_2602.11783/./Figs/NHI_MUV_sims.png', 'tmp_2602.11783/./Figs/NHI_beta.png']
copying  tmp_2602.11783/./Figs/Example_and_corner.png to _build/html/
copying  tmp_2602.11783/./Figs/NHI_MUV_sims.png to _build/html/
copying  tmp_2602.11783/./Figs/NHI_beta.png to _build/html/
exported in  _build/html/2602.11783.md
    + _build/html/tmp_2602.11783/./Figs/Example_and_corner.png
    + _build/html/tmp_2602.11783/./Figs/NHI_MUV_sims.png
    + _build/html/tmp_2602.11783/./Figs/NHI_beta.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\hi}{H {\textsc{i}}}$
$\newcommand{\hii}{H {\textsc{ii}}}$
$\newcommand{\nii}{N {\textsc{ii}}}$
$\newcommand{\oiii}{O {\textsc{iii}}}$
$\newcommand{\oii}{O {\textsc{ii}}}$
$\newcommand{\cii}{C {\textsc{ii}}}$
$\newcommand{\neiii}{Ne {\textsc{iii}}}$
$\newcommand{\niv}{N {\textsc{iv}}}$
$\newcommand{\civ}{C {\textsc{iv}}}$
$\newcommand{\heii}{He {\textsc{ii}}}$
$\newcommand{\niii}{N {\textsc{iii}}}$
$\newcommand{\ciii}{C {\textsc{iii}}}$
$\newcommand{\lya}{Ly\alpha}$
$\newcommand{\jwst}{{\em JWST}}$
$\newcommand{\alma}{{\em ALMA}}$</div>



<div id="title">

# Characterising Ly$\alpha$ damping wings at the onset of reionisation: Evidence for highly efficient star formation driven by dense, neutral gas in UV-bright galaxies at $z>9$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2602.11783-b31b1b.svg)](https://arxiv.org/abs/2602.11783)<mark>Appeared on: 2026-02-13</mark> -  _Submitted to A&A_

</div>
<div id="authors">

C. L. Pollock, et al. -- incl., <mark>F. Walter</mark>

</div>
<div id="abstract">

**Abstract:** One of the major conundrums in contemporary extragalactic astrophysics is the apparent overabundance of a remarkable population of UV-bright galaxies at redshifts $z\gtrsim 9$ .  We analyse  galaxies spectroscopically observed by $\jwst$ /NIRSpec Prism and confirmed to lie at $z>9$ , with sufficient signal-to-noise to carefully model their rest-frame UV to optical continua and line emission. In particular, we model the damped Lyman- $\alpha$ (Ly $\alpha$ ) absorption (DLA) features of each galaxy to place observational constraints on the gas assembly of neutral atomic hydrogen ( $\hi$ ) onto the galaxy halos at the onset of cosmic reionisation.  Based on the derived $\hi$ column densities and star-formation rate (SFR) surface densities, we show that all galaxies are highly efficient at forming stars on rapid $\sim 10-100 $ Myr depletion timescales, greatly in excess compared to the canonical local universe Kennicutt-Schmidt relation and predictions from state-of-the-art galaxy formation simulations. The dense $\hi$ gas appears to also drive the offset from the fundamental-metallicity relation of these galaxies though its dust-to-gas ratio is seemingly consistent with values derived for local galaxies except for the lowest metallicity sight-lines.  Our results provide the first robust observational constraints on the impact of pristine $\hi$ gas on early galaxy assembly, and imply that a combination of highly efficient star formation and low dust obscuration can likely explain the UV-brightness of galaxies at cosmic dawn.

</div>

<div id="div_fig1">

<img src="tmp_2602.11783/./Figs/Example_and_corner.png" alt="Fig5" width="100%"/>

**Figure 5. -** _Left:_ Example of UV emission line and DLA fitting for JADES-GS-z13-0 (DJA ID 3215\_20128771) at $z=12.85$. The NIRSpec/Prism spectrum and associated error are shown in green. The marked UV emission lines were modelled and then superimposed on the intrinsic spectrum before modelling the DLA (solid black line). In the inset, we show a zoom on the Ly$\alpha$ region, with the DLA+IGM model as a solid line, and with 100\% neutral IGM only ($\mathrm{x_{\rm HI}}=1.0$) as a dashed line. _Right:_ Corner plot of the posterior distributions for the DLA+IGM model, with median, 16th and 84th percentiles marked. (*fig:DLAex*)

</div>
<div id="div_fig2">

<img src="tmp_2602.11783/./Figs/NHI_MUV_sims.png" alt="Fig3" width="100%"/>

**Figure 3. -** Measured UV magnitude and column density. Halo masses for our objects are derived from UV magnitudes \citep[][no dust model]{Mason23}. The well-constrained column densities of our sample are consistent with results from _SERRA_ simulations  ([Gelli, et. al 2025](https://ui.adsabs.harvard.edu/abs/2025arXiv251001315G)) . The grey contours represent the average column density of 100 galaxies over random sight-lines, with the 1$\sigma$ error region also shown, representing the scatter across sightlines in simulations. (*fig:Mhalo*)

</div>
<div id="div_fig3">

<img src="tmp_2602.11783/./Figs/NHI_beta.png" alt="Fig1" width="100%"/>

**Figure 1. -** Measured $\beta_{UV}$ slope and column density $\mathrm{log}(N_{\rm HI}/ \rm cm^{-2})$ for the galaxy sample. The green squares and triangles again represent the well-constrained and upper limits for column density respectively, with the grey shaded region representing possible high nebular continuum where two-photon emission may be masquerading as a DLA  ([Katz, et. al 2025](https://ui.adsabs.harvard.edu/abs/2025OJAp....8E.104K)) . (*fig:beta_NHI*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2602.11783"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

123  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

12  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
