# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning', 'R. E. Hviding']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Neumann  ->  J. Neumann  |  ['J. Neumann']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
A. Pillepich  ->  A. Pillepich  |  ['A. Pillepich']
S. Kumar  ->  S. Kumar  |  ['S. Kumar']
A. Pillepich  ->  A. Pillepich  |  ['A. Pillepich']
J. Li  ->  J. Li  |  ['J. Li']
J. Li  ->  J. Li  |  ['J. Li']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
S. Souza  ->  S. Souza  |  ['S. Souza']
W. Brandner  ->  W. Brandner  |  ['W. Brandner']
Arxiv has 71 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2601.18871
extracting tarball to tmp_2601.18871... done.


Issues with the citations
plugin pybtex.database.input.suffixes for suffix .tex not found
Retrieving document from  https://arxiv.org/e-print/2601.18890
extracting tarball to tmp_2601.18890...

 done.




Retrieving document from  https://arxiv.org/e-print/2601.19018


extracting tarball to tmp_2601.19018... done.
Retrieving document from  https://arxiv.org/e-print/2601.19148
extracting tarball to tmp_2601.19148... done.


Issues with the citations
repeated bibliography entry: Snyder_2015
Retrieving document from  https://arxiv.org/e-print/2601.19344
extracting tarball to tmp_2601.19344...

 done.
Retrieving document from  https://arxiv.org/e-print/2601.19522


extracting tarball to tmp_2601.19522... done.


J. Li  ->  J. Li  |  ['J. Li']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']


Issues with the citations
plugin pybtex.database.input.suffixes for suffix .tex not found
Retrieving document from  https://arxiv.org/e-print/2601.19531
extracting tarball to tmp_2601.19531... done.


W. Brandner  ->  W. Brandner  |  ['W. Brandner']


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.18871-b31b1b.svg)](https://arxiv.org/abs/2601.18871) | **A nuclear disc at Cosmic Noon: evidence of early bar-driven galaxy evolution**  |
|| Z. A. L. Conte, et al. -- incl., <mark>J. Neumann</mark> |
|*Appeared on*| *2026-01-28*|
|*Comments*| *6 pages, 3 figures, submitted to MNRAS Letters*|
|**Abstract**|            Recent studies have revealed that bars can form as early as a few billion years after the Big Bang, already displaying similar characteristics of evolved bars in the Local Universe. Bars redistribute angular momentum across the galaxy, regulating star formation, AGN activity, and building new stellar structures such as nuclear discs. However, the effects of bar-driven evolution on young galaxies are not yet known, as no evidence of bar-built stellar structures has ever been found beyond $z = 1$, until now. In this work, we show evidence of a bar-built, star-forming nuclear disc, already present at redshift $z = 1.5$. This is the first evidence of a bar-built stellar structure at Cosmic Noon. We find that this nuclear disc is actively forming stars and has the same size as some nuclear discs in nearby galaxies. This evidence solidifies the now emerging picture in which bars are fundamental not only in the late evolution of galaxies, but also in their early evolutionary stages. It changes the current paradigm by urging a revision of our picture of galaxy evolution beyond redshift one, to include new considerations on the role played by bars as early as a few billion years after the Big Bang.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.18890-b31b1b.svg)](https://arxiv.org/abs/2601.18890) | **Testing the inference of kinematics from mock JWST NIRSpec/MSA observations of TNG50 galaxies at $z\sim2-6$**  |
|| R. Anirudh, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>A. Pillepich</mark> |
|*Appeared on*| *2026-01-28*|
|*Comments*| *Submitted to MNRAS. Key figures: 5, 8, 10, and 13. Comments welcome*|
|**Abstract**|            We use the TNG50 galaxy formation simulation to generate mock JWST NIRCam and NIRSpec microshutter array (MSA) observations of H$\alpha$-emitting gas in $M_*=10^8-10^{11.5}\,M_\odot$ star-forming galaxies at $z=2-6$. We measure morphological properties from the mock imaging through Sersic profile fitting, and gas rotational velocities ($v$) and velocity dispersions ($\sigma$) by fitting the mock spectra as thin, rotating discs. To test the efficacy of such simple parametric models in describing complex ionised gas kinematics, we compare the best-fit quantities to intrinsic simulation measurements. At $z=3$, we find that $v$ and $\sigma$ for aligned and resolved sources generally agree well with intrinsic measurements, within a factor of $\sim$2 and $\sim$1.5, respectively. The recovery of kinematics is robust for smooth, disc-like systems, but $v$ and $\sigma$ can be over- or underestimated by more than a factor of 2, respectively, for intrinsically elongated systems. The scatter in the recovery accuracy is larger at higher redshift, as TNG50 galaxies at $z>3$ deviate more strongly from the thin rotating disc assumption. Despite uncertain measurements for individual galaxies, we find that key population trends, such as the weak redshift evolution of $\sigma$ and $v/\sigma$ as well as the dependence of $\sigma$ on the global star formation rate, are broadly recovered by our kinematic modelling. Our work provides the end-to-end framework needed to compare NIRSpec MSA observations to cosmological simulations and to quantify observational biases in measuring ionised gas kinematics, highlighting the need for the development of dedicated models for high-redshift galaxies.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.19148-b31b1b.svg)](https://arxiv.org/abs/2601.19148) | **ERGO-ML: The assembly histories of HSC galaxy images via invertible neural networks, contrastive learning, and cosmological simulations**  |
|| L. Eisert, et al. -- incl., <mark>A. Pillepich</mark> |
|*Appeared on*| *2026-01-28*|
|*Comments*| *Submitted to MNRAS, 25 pages, 10 figures*|
|**Abstract**|            In this paper of ERGO-ML (Extracting Reality from Galaxy Observables with Machine Learning), we develop a model that infers the merger/assembly histories of galaxies directly from optical images. We apply the self-supervised contrastive learning framework NNCLR (Nearest-Neighbor Contrastive Learning of visual Representations) on realistic HSC mock images (g,r,i - bands) produced from galaxies simulated within the TNG50 and TNG100 flagship runs of the IllustrisTNG project. The resulting representation is then used as conditional input for a cINN (conditional Invertible Neural Network) to gain posteriors for merger/assembly statistics, particularly the lookback time and stellar mass of the last major merger and the fraction of ex-situ stars. Through validation against the ground truth available for simulated galaxies, we assess the performance of our model, achieving good accuracy in inferring the stellar ex-situ fraction ($\le \pm 10$ per cent for 80 per cent of the test sample) and the mass of the last major merger (within $\pm 0.5 \log \MSUN$ for stellar masses $>10^{9.5} \MSUN$ ). We successfully apply the TNG-trained model to simulated mocks from the EAGLE simulation, demonstrating that our model is applicable outside of the TNG domain. We use our simulation-based model to infer aspects of the history of observed galaxies, in particular for HSC images that are close to the domain of TNG ones. We recover the trend of increasing ex-situ stellar fraction with stellar mass and more spherical morphology, but we also identify a discrepancy between TNG and HSC: on average, observed galaxies generally exhibit lower ex-situ fractions. Despite challenges such as information loss (e.g. projection effects and surface brightness limits) and domain shifts (from simulations to observations), our results demonstrate the feasibility of extracting the merger past of galaxies from their optical images.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.19522-b31b1b.svg)](https://arxiv.org/abs/2601.19522) | **Variations in the Milky Way's Stellar Mass Function at [Fe/H] < -1**  |
|| <mark>J. Li</mark>, et al. -- incl., <mark>H.-W. Rix</mark>, <mark>S. Souza</mark> |
|*Appeared on*| *2026-01-28*|
|*Comments*| *15 pages, 9 figures, 5 tables, Accepted for publication in ApJL*|
|**Abstract**|            We present the first determination of the Galactic stellar mass function (MF) for low-mass stars (0.2-0.5 M_sun) at metallicities [Fe/H] < -1. A sample of ~53,000 stars was selected as metal-poor on the basis of both their halo-like orbits and their spectroscopic [Fe/H] from Gaia DR3 BP/RP (XP) spectra. These metallicity estimates for low-mass stars were enabled by calibrating Gaia XP spectra with stellar parameters from SDSS-V. For -1.5 < [Fe/H] < -1, we find that the MF below 0.5 M_sun exhibits a "bottom-heavy" power-law slope of alpha ~ -1.6. We tentatively find that at even lower metallicities, the MF becomes very bottom-light, with a near-flat power-law slope of alpha ~ 0 that implies a severe deficit of low-mass stars. This metallicity-dependent variation is insensitive to the adopted stellar evolution model. These results show that the Galactic low-mass MF is not universal, with variations in the metal-poor regime. A further calibration of XP metallicities in the regime of M < 0.5 M_sun and [Fe/H] < -1.5 will be essential to verify these tentative low-metallicity trends.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.19531-b31b1b.svg)](https://arxiv.org/abs/2601.19531) | **Predicted incidence of Jupiter-like planets around white dwarfs**  |
|| A. Mauch-Soriano, et al. -- incl., <mark>W. Brandner</mark> |
|*Appeared on*| *2026-01-28*|
|*Comments*| *Accepted for publication in A&A*|
|**Abstract**|            Only a handful of gas giant planets orbiting white dwarfs are known. It remains unclear whether this paucity reflects observational challenges or the consequences of stellar evolution. We aim to carry out population synthesis of substellar objects around white dwarfs to predict the fraction and properties of white dwarfs hosting substellar companions. We generated a representative population of white-dwarf progenitors with substellar companion and used the stellar-evolution codes MESA and SSE with standard prescriptions for mass loss and stellar tides to predict the resulting population of white dwarfs and their companions. We find that the predicted fraction of white dwarfs hosting substellar companions in the Milky Way is, independent of uncertainties related to initial distributions, stellar tides, or stellar mass loss during the asymptotic giant branch, below ~3%. The occurrence rate peaks at relatively low-mass (~0.53 Msun to ~0.66 Msun) white dwarfs and relatively young (~1-6 Gyr) systems, where it exceeds 3%. The semimajor axes of the surviving companions range from 3-24 au. We estimate that ~95% of the predicted companions are gas-giant planets. Owing to the strong dependence of companion occurrence on the metallicity of the white dwarf progenitor, the assumed age-metallicity relation strongly affects the predictions. Based on recent estimates of the local age-metallicity relation, we estimate that the fraction of white dwarfs with companions close to the Sun might reach ~8%. If the planetary and brown dwarf companion distributions derived from intermediate-mass giant stars through radial velocity surveys reflect the characteristics of the true population, less than 3% of white dwarfs host substellar companions. This most likely represents an upper limit on possible detections because a significant number of companions might not be detectable with current facilities.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.19018-b31b1b.svg)](https://arxiv.org/abs/2601.19018) | **A multiwavelength view of the nearby Calcium-Strong Transient SN 2025coe in the X-Ray, Near-Infrared, and Radio Wavebands**  |
|| <mark>S. Kumar</mark>, et al. |
|*Appeared on*| *2026-01-28*|
|*Comments*| *submitted to ApJ on Jan 26, 2026*|
|**Abstract**|            Calcium-strong transients (CaSTs) are a subclass of faint and rapidly evolving supernovae (SNe) that exhibit strong calcium features and notably weak oxygen features. The small but growing population of CaSTs exhibits some aspects similar to thermonuclear supernovae and others that are similar to massive star core-collapse events, leading to intriguing questions on the physical origins of CaSTs. SN 2025coe is one of the most nearby CaSTs discovered to date, and our coordinated multi-wavelength observations obtained days to weeks post-explosion reveal new insights on these enigmatic transients. With the most robust NIR spectroscopic time-series of a CaST collected to date, SN 2025coe shows spectral signatures characteristic of Type Ib SNe (SNe Ib, i.e. He-rich stripped-envelope SNe). SN~2025coe is the third X-ray detected CaST and our analysis of the \textit{Swift} X-ray data suggest interaction with 0.12 $\pm\,0.11\ M_{\odot}$ of circumstellar material (CSM) extending to at least $2 \times 10^{15} $cm ($\sim 30,000\ R_{\odot}$), while our analysis of the 1-240 GHz radio non-detections gives an outer radius of that CSM of at most $\sim 4\times 10^{15}$ cm. This inferred nearby high-density CSM extending out to $3\pm 1 \times10^{15}$ cm is similar to that seen in the other two X-ray detected CaSTs, and its presence suggests that either intensive mass-loss or some polluting mechanism may be a common feature of this subclass. Our work also expands upon recent studies on the optical properties of SN 2025coe and explores our current understanding of different progenitor systems that could possibly produce CaSTs.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2601.19344-b31b1b.svg)](https://arxiv.org/abs/2601.19344) | **CH3CCH as a thermometer in warm molecular gas**  |
|| Y. Li, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2026-01-28*|
|*Comments*| *Accepted for publication in ApJ, 27 pages, 15 figures*|
|**Abstract**|            Kinetic temperature is a fundamental parameter in molecular clouds. Symmetric top molecules, such as NH$_3$ and CH$_3$CCH, are often used as thermometers. However, at high temperatures, NH$_3$(2,2) can be collisionally excited to NH$_3$(2,1) and rapidly decay to NH$_3$(1,1), which can lead to an underestimation of the kinetic temperature when using rotation temperatures derived from NH$_3$(1,1) and NH$_3$(2,2). In contrast, CH$_3$CCH is a symmetric top molecule with lower critical densities of its rotational levels than those of NH$_3$, which can be thermalized close to the kinetic temperature at relatively low densities of about 10$^{4}$ cm$^{-3}$. To compare the rotation temperatures derived from NH$_3$(1,1)$\&$(2,2) and CH$_3$CCH rotational levels in warm molecular gas, we used observational data toward 55 massive star-forming regions obtained with Yebes 40m and TMRT 65m. Our results show that rotation temperatures derived from NH$_3$(1,1)$\&$(2,2) are systematically lower than those from CH$_3$CCH 5-4. This suggests that CH$_3$CCH rotational lines with the same $J$+1$\rightarrow$$J$ quantum number may be a more reliable thermometer than NH$_3$(1,1)$\&$(2,2) in warm molecular gas located in the surroundings of massive young stellar objects or, more generally, in massive star-forming regions.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2601.18871/./NIRCam_filters.png', 'tmp_2601.18871/./results.png', 'tmp_2601.18871/./maps.png']
copying  tmp_2601.18871/./NIRCam_filters.png to _build/html/
copying  tmp_2601.18871/./results.png to _build/html/
copying  tmp_2601.18871/./maps.png to _build/html/
exported in  _build/html/2601.18871.md
    + _build/html/tmp_2601.18871/./NIRCam_filters.png
    + _build/html/tmp_2601.18871/./results.png
    + _build/html/tmp_2601.18871/./maps.png
found figures ['', '', '', '', '', '', '', '', '', '', '']
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
exported in  _build/html/2601.18890.md
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
found figures ['tmp_2601.19148/./img/results/kde_TNG

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# A nuclear disc at Cosmic Noon: evidence of early bar-driven galaxy evolution

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.18871-b31b1b.svg)](https://arxiv.org/abs/2601.18871)<mark>Appeared on: 2026-01-28</mark> -  _6 pages, 3 figures, submitted to MNRAS Letters_

</div>
<div id="authors">

Z. A. L. Conte, et al. -- incl., <mark>J. Neumann</mark>

</div>
<div id="abstract">

**Abstract:** Recent studies have revealed that bars can form as early as a few billion years after the Big Bang, already displaying similar characteristics of evolved bars in the Local Universe. Bars redistribute angular momentum across the galaxy, regulating star formation, AGN activity, and building new stellar structures such as nuclear discs. However, the effects of bar-driven evolution on young galaxies are not yet known, as no evidence of bar-built stellar structures has ever been found beyond $z = 1$ , until now. In this work, we show evidence of a bar-built, star-forming nuclear disc, already present at redshift $z = 1.5$ . This is the first evidence of a bar-built stellar structure at Cosmic Noon. We find that this nuclear disc is actively forming stars and has the same size as some nuclear discs in nearby galaxies. This evidence solidifies the now emerging picture in which bars are fundamental not only in the late evolution of galaxies, but also in their early evolutionary stages. It changes the current paradigm by urging a revision of our picture of galaxy evolution beyond redshift one, to include new considerations on the role played by bars as early as a few billion years after the Big Bang.

</div>

<div id="div_fig1">

<img src="tmp_2601.18871/./NIRCam_filters.png" alt="Fig2" width="100%"/>

**Figure 2. -** The galaxy images from seven NIRCam filters, annotated in the top-left corner of each image with the filter name and rest-frame wavelength for a redshift of $z = 1.461$. A circle depicting 2×FWHM of the PSF is shown in the lower-left corner of each image. The lower-right panel is an RGB image obtained from the filters F115W, F150W and F200W. (*fig1*)

</div>
<div id="div_fig2">

<img src="tmp_2601.18871/./results.png" alt="Fig3" width="100%"/>

**Figure 3. -** Image analysis of the galaxy in the F150W (top row) and F200W (bottom row) NIRCam filters. From left to right: NIRCam image; unsharp masked image; IMFIT residual image for a multi-component fit; isophotal ellipse fitting of the NIRCam image; ellipticity radial profile from ellipse fitting, showing the peak in ellipticity of the nuclear bar in the nuclear disc as a dashed line and of the main bar as a dotted line. The nuclear disc size, measured with visual inspection of the images, is shown as a dot-dashed circle in the unsharp masked and residual images. (*fig2*)

</div>
<div id="div_fig3">

<img src="tmp_2601.18871/./maps.png" alt="Fig1" width="100%"/>

**Figure 1. -** Resolved property maps from NIRCam SED fitting. Left to right: stellar mass density, SFR density, and the strength of the 4000Å break. (*fig3*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.18871"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\Zsun}{{\rm Z}_{\odot}}$
$\newcommand{\Msun}{{\rm M}_{\odot}}$
$\newcommand{\HI}{\ion{H}{I}}$
$\newcommand{\Mstar}{M_*}$
$\newcommand{\Halpha}{H\alpha}$
$\newcommand{\msafit}{{\tt msafit}}$
$\newcommand{\pysersic}{{\tt pysersic}}$
$\newcommand{\Sersic}{S{\' e}rsic}$
$\newcommand{\msavre}{v(r_{\rm e})}$
$\newcommand{\msasigma}{\sigma_0}$
$\newcommand{\TNGvre}{v_{\rm max}(r\leq r_{\rm e})}$
$\newcommand{\TNGsigma}{\sigma_e}$
$\newcommand{\kms}{\rm km s^{-1}}$
$\newcommand{\orcid}[1]{\href{https://orcid.org/#1}{\includegraphics[scale=0.25]{figures/ORCID-iD_icon_32x32.png}}}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$
$\newcommand\ap{#1}$</div>



<div id="title">

# Testing the inference of kinematics from mock JWST NIRSpec/MSA observations of TNG50 galaxies at $z\sim2-6$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.18890-b31b1b.svg)](https://arxiv.org/abs/2601.18890)<mark>Appeared on: 2026-01-28</mark> -  _Submitted to MNRAS. Key figures: 5, 8, 10, and 13. Comments welcome_

</div>
<div id="authors">

R. Anirudh, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>A. Pillepich</mark>

</div>
<div id="abstract">

**Abstract:** We use the TNG50 galaxy formation simulation to generate mock JWST NIRCam and NIRSpec microshutter array (MSA) observations of $\Halpha$ -emitting gas in $M_*=10^8-10^{11.5} \Msun$ star-forming galaxies at $z=2-6$ . We measure morphological properties from the mock imaging through $\Sersic$ profile fitting, and gas rotational velocities ( $v$ ) and velocity dispersions ( $\sigma$ ) by fitting the mock spectra as thin, rotating discs. To test the efficacy of such simple parametric models in describing complex ionised gas kinematics, we compare the best-fit quantities to intrinsic simulation measurements. At $z=3$ , we find that $v$ and $\sigma$ for aligned and resolved sources generally agree well with intrinsic measurements, within a factor of $\sim$ 2 and $\sim$ 1.5, respectively. The recovery of kinematics is robust for smooth, disc-like systems, but $v$ and $\sigma$ can be over- or underestimated by more than a factor of 2, respectively, for intrinsically elongated systems. The scatter in the recovery accuracy is larger at higher redshift, as TNG50 galaxies at $z>3$ deviate more strongly from the thin rotating disc assumption. Despite uncertain measurements for individual galaxies, we find that key population trends, such as the weak redshift evolution of $\sigma$ and $v/\sigma$ as well as the dependence of $\sigma$ on the global star formation rate, are broadly recovered by our kinematic modelling. Our work provides the end-to-end framework needed to compare NIRSpec MSA observations to cosmological simulations and to quantify observational biases in measuring ionised gas kinematics, highlighting the need for the development of dedicated models for high-redshift galaxies.

</div>

<div id="div_fig1">

<img src="" alt="Fig12.1" width="50%"/><img src="" alt="Fig12.2" width="50%"/>

**Figure 12. -** Ratios of best-fit to intrinsic kinematics as a function of global galaxy properties for $z=3$ TNG50 galaxies with stellar masses of $10^8-10^{11.5} $\Msun$$(additional redshifts in Fig. \ref{fig:bestfitbyintrinsic_vs_galprop_altz}). On the left hand side, we focus on rotational velocities, i.e. the ratio of best-fit $v(r_{\rm e})$ to intrinsic $v_{\rm max}(r\leq r_{\rm e})$, whereas on the right hand side we focus on velocity dispersions, i.e. the ratio of best-fit $\sigma_0$ to intrinsic $\sigma_e$. The four sub-panels are (i) galaxy stellar mass, (ii) $\Halpha$ effective radii, (iii) Intrinsic axis ratio $C/A$, \&(iv) Intrinsic $\TNGvre$/$\TNGsigma$. The different colours correspond to subsets as in Fig. \ref{fig:bestfitvsintrinsic}(see legend). The solid lines correspond to running medians for bins in log space. The dashed lines are 16th and 84th percentiles, respectively. Spearman rank coefficients and $p$-values for the aligned$+$resolved subset are indicated in the bottom right of each sub-panel. The dashed grey line at y-axis$=1$ is when the best-fit kinematic properties perfectly match the intrinsic measurements. The ratios of best-fit to intrinsic $v$ and $\sigma$ show nearly no dependence on stellar mass and effective radii, but are strongly (anti-)correlated with the intrinsic rotational support $\TNGvre$/$\TNGsigma$. (*fig:bestfitbyintrinsic_vs_galprop*)

</div>
<div id="div_fig2">

<img src="" alt="Fig18.1" width="16%"/><img src="" alt="Fig18.2" width="16%"/><img src="" alt="Fig18.3" width="16%"/><img src="" alt="Fig18.4" width="16%"/><img src="" alt="Fig18.5" width="16%"/><img src="" alt="Fig18.6" width="16%"/>

**Figure 18. -** Differences in inference fidelity as a function of galaxy property: (Left) Best-fit $\msavre$ by intrinsic TNG $\TNGvre$(Right) Best-fit $\msasigma$ by intrinsic TNG $\TNGsigma$ vs galaxy properties for $z=2,4,6$. The axes, lines, and annotations are as in Fig. \ref{fig:bestfitbyintrinsic_vs_galprop}. (*fig:bestfitbyintrinsic_vs_galprop_altz*)

</div>
<div id="div_fig3">

<img src="" alt="Fig17.1" width="33%"/><img src="" alt="Fig17.2" width="33%"/><img src="" alt="Fig17.3" width="33%"/>

**Figure 17. -** Differences in inference fidelity for $v$, $\sigma$ and $v/\sigma$: Best-fit $\msavre$ vs intrinsic $\TNGvre$, best-fit $\msasigma$ vs intrinsic $\TNGsigma$, and best-fit $\msavre$/$\msasigma$ vs intrinsic $\TNGvre$/$\TNGsigma$ for $z=2,4,6$. The lines, contours, and annotations are as in Fig. \ref{fig:bestfitvsintrinsic}. (*fig:bestfitvsintrinsic_altz*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.18890"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\orcid}[1]{\href{https://orcid.org/#1}{\includegraphics[scale=0.08]{img/data/ORCID-iD_icon-128x128.png}}}$
$\newcommand{\MSUN}{\rm{M}_{\odot}}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# ERGO-ML: The assembly histories of HSC galaxy images via invertible neural networks, contrastive learning, and cosmological simulations

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.19148-b31b1b.svg)](https://arxiv.org/abs/2601.19148)<mark>Appeared on: 2026-01-28</mark> -  _Submitted to MNRAS, 25 pages, 10 figures_

</div>
<div id="authors">

L. Eisert, et al. -- incl., <mark>A. Pillepich</mark>

</div>
<div id="abstract">

**Abstract:** In this paper of ERGO-ML (Extracting Reality from Galaxy Observables with Machine Learning), we develop a model that infers the merger/assembly histories of galaxies directly from optical images. We apply the self-supervised contrastive learning framework NNCLR (Nearest-Neighbor Contrastive Learning of visual Representations) on realistic HSC mock images (g,r,i - bands) produced from galaxies simulated within the TNG50 and TNG100 flagship runs of the IllustrisTNG project and with stellar masses of $10^{9-12} \MSUN$ . The resulting representation is then used as conditional input for a cINN (conditional Invertible Neural Network) to gain posteriors for merger/assembly statistics, particularly the lookback time and stellar mass of the last major merger and the fraction of ex-situ stars.Through validation against the ground truth available for simulated galaxies, we assess the performance of our model, achieving good accuracy in inferring the stellar ex-situ fraction ( $\le \pm 10$ per cent for 80 per cent of the test sample) and the mass of the last major merger (within $\pm 0.5 \log \MSUN$ for stellar masses $>10^{9.5} \MSUN$ ). The information content about the lookback time is, instead, limited. We also successfully apply the TNG-trained model to simulated mocks from the EAGLE simulation, demonstrating that our model is applicable outside of the TNG domain. We hence use our simulation-based model to infer aspects of the history of observed galaxies, in particular for HSC images that are close to the domain of TNG ones. We recover the trend of increasing ex-situ stellar fraction with stellar mass and more spherical morphology, but we also identify a discrepancy between TNG and HSC: on average, observed galaxies generally exhibit lower ex-situ fractions. Despite challenges such as information loss (e.g. projection effects and surface brightness limits) and domain shifts (from simulations to observations), our results demonstrate the feasibility of extracting the merger past of galaxies from their optical images.

</div>

<div id="div_fig1">

<img src="tmp_2601.19148/./img/results/kde_TNG100.png" alt="Fig7.1" width="25%"/><img src="tmp_2601.19148/./img/results/kde_TNG50.png" alt="Fig7.2" width="25%"/><img src="tmp_2601.19148/./img/results/kde_EAGLE.png" alt="Fig7.3" width="25%"/><img src="tmp_2601.19148/./img/results/kde_SIMBA.png" alt="Fig7.4" width="25%"/>

**Figure 7. -** ** How well do representations of the observed and simulated galaxy images align to each other?** We compare the distributions of TNG100, TNG50, EAGLE, and SIMBA images to the observed ones from HSC in the corresponding 2D-UMAP mapping of the 256-dimensional representations obtained by training a ResNet model using contrastive learning _ simultaneously_ on all  datasets. In the top-left panel, we show kernel density estimation (KDE) density plots of TNG100 images in blue and of the selection function-matched HSC set in light blue. Analogously, we compare the image density distributions of TNG50 vs. its corresponding matched HSC set (top right) and similar visualizations of the UMAPs for EAGLE (bottom left) and SIMBA (bottom right). Contours indicate isodensity lines derived from KDE in the 2D UMAP space. While there is a significant overlap among the first three sets, slight offsets and differences in point density are also evident. For the SIMBA galaxies, the distributions of images diverge most significantly. Namely, TNG100, TNG50 and EAGLE return galaxy images that are, at the population level, more consistent with observed ones from HSC than SIMBA. (*fig:umaps*)

</div>
<div id="div_fig2">

<img src="tmp_2601.19148/./img/results/similarity_distribution_scaled_before.png" alt="Fig8" width="100%"/>

**Figure 8. -** ** Out-of-Domain (OOD) Scores of the same image representations of Figure \ref{fig:umaps**, to quantitatively assess the realism of simulated galaxies in comparison to HSC images.} We show the OOD score distributions for the observed HSC and simulated datasets (TNG50, TNG100, EAGLE, and SIMBA), following the methodology introduced by \citealt{Eisert_2024} and  using the self-supervised contrastive-learning model of Figure \ref{fig:umaps}. The OOD score is evaluated across multiple scenarios, including comparisons between pairs of datasets and random splits within each dataset, to understand the inherent scatter: the distribution of self-distances for HSC images is provided as reference and is similar to those of the self-distances across the other simulated samples (not shown). The measurement distinguishes between the "sides" of the comparison, where, for example, TNG100-HSC refers to the distances of TNG100 galaxies relative to HSC, while HSC-TNG100 reflects the reverse. Images with high OOD values denote galaxies that do not resemble well observed ones. From the shape of the distributions, we can see that TNG50 and TNG100 return galaxy samples that are overall more realistic, i.e. more aligned, with HSC galaxies than EAGLE and, to a much larger degree, SIMBA. Namely, there are relatively fewer TNG50 and TNG100 galaxies than in SIMBA that appear inconsistent with HSC data. (*fig:similarity_distribution_pre_matched*)

</div>
<div id="div_fig3">

<img src="tmp_2601.19148/./img/results/inference_exsitu_mass.png" alt="Fig12.1" width="33%"/><img src="tmp_2601.19148/./img/results/inference_exsitu_radius.png" alt="Fig12.2" width="33%"/><img src="tmp_2601.19148/./img/results/mass_radius_exsitu_map_new.png" alt="Fig12.3" width="33%"/>

**Figure 12. -** ** The ex-situ stellar mass fraction of HSC galaxies inferred by our SBI model based on the TNG50 and TNG100 simulations.** Top left: 2D histogram of the ex-situ fractions, inferred by our model, plotted against the galaxy stellar masses from the Sloan Digital Sky Survey (SDSS). For each HSC image with available SDSS stellar masses (a total of 9,043 galaxies), we plot 400 posterior samples to account for the uncertainty in predictions as modeled by our approach (color code). The median of all ex-situ fraction posterior samples in bins of stellar mass is shown for both HSC galaxies (in grey) and the combined TNG50/TNG100 dataset (in orange), in addition to dashed curves encompassing the 80 per-cent range of posterior data points in each mass bin. For the TNG galaxies, we use the total stellar mass of the corresponding galaxy. Since stellar mass measurements for HSC galaxies are available only for a subset of the overall HSC sample, we perform an additional mass matching between HSC and TNG galaxies for this plot, assuming that the two different operational definitions of a galaxy mass are consistent. Top right: same as on the left, but for the inferred ex-situ fractions plotted against galaxy stellar sizes (63,403 galaxies in total). We derive the physical radii from the Petrosian 90 per-cent light radii using the spectroscopic redshifts (Table \ref{tab:observable_properties}) and the Petrosian radii are measured exactly in the same way in both observed and simulations maps. Main: galaxy stellar sizes vs. galaxy stellar masses of 9,043 HSC galaxies color coded by the inferred ex-situ mass fraction. Each hex-bin is colored according to the median MAP of the ex-situ posterior inferred by our model. The size of the hex-bin decode the median uncertainty (i.e. the standard deviation of the posterior) of the prediction. Small hexagons relate to a small uncertainty ($\approx 0.04$ per-cent) while large hexagons to a large uncertainty ($\approx 0.12$ per-cent). According to HSC galaxies, more massive galaxies and more extended ones (also at fixed stellar mass) are made of larger fractions of accreted stars. (*fig:inference_exsitu_mass_size*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.19148"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\gaia}{\textsl{Gaia}}$
$\newcommand{\photmoh}{[M/H]_{\rm phot}}$
$\newcommand{\teff}{T_{\rm eff}}$
$\newcommand{\logg}{\log ~ g}$
$\newcommand{\moh}{[M/H]}$
$\newcommand{\emoh}{\sigma_{\rm{[M/H]}}}$
$\newcommand{\eteff}{\sigma_{T_{\rm eff}}}$
$\newcommand{\elogg}{\sigma_{\log ~ g}}$
$\newcommand{\msun}{{\rm M}_{\odot}}$
$\newcommand{\HWRrevision}[1]{\textcolor{blue}{#1}}$
$\newcommand{\jd}[1]{\textcolor{blue}{#1}}$</div>



<div id="title">

# Variations in the Milky Way's Stellar Mass Function at [Fe/H] < -1

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.19522-b31b1b.svg)](https://arxiv.org/abs/2601.19522)<mark>Appeared on: 2026-01-28</mark> -  _15 pages, 9 figures, 5 tables, Accepted for publication in ApJL_

</div>
<div id="authors">

<mark>J. Li</mark>, et al. -- incl., <mark>H.-W. Rix</mark>

</div>
<div id="abstract">

**Abstract:** We present the first determination of the Galactic stellar mass function (MF) for low-mass stars ( $0.2-0.5 {\rm M}_\odot$ ) at metallicities [ Fe/H ] $\lesssim -1$ . A sample of $\sim$ 53,000  stars was selected as metal-poor on the basis of both their halo-like orbits and their spectroscopic [ Fe/H ] from Gaia DR3 BP/RP (XP) spectra. These metallicity estimates for low-mass stars were enabled by calibrating Gaia XP spectra with stellar parameters from SDSS-V.For $-1.5<$ [ Fe/H ] $<$ -1, we find that the MF below 0.5 ${\rm M}_\odot$ exhibits a "bottom-heavy" power-law slope of $\alpha \sim -1.6$ . We tentatively find that at even lower metallicities, the MF becomes very bottom-light, with a near-flat power-law slope of $\alpha \sim 0$ that implies a severe deficit of low-mass stars. This metallicity-dependent variation is insensitive to the adopted stellar evolution model. These results show that the Galactic low-mass MF is not universal, with variations in the metal-poor regime.A further calibration of XP metallicities in the regime of $M<0.5 M_{\odot}$ and [ Fe/H ] $<-1.5$ will be essential to verify these tentative low-metallicity trends.

</div>

<div id="div_fig1">

<img src="tmp_2601.19522/./fig/cmd_logprob_diff_v2.png" alt="Fig6.1" width="50%"/><img src="tmp_2601.19522/./fig/cmd_isocut_flowcut_feh.png" alt="Fig6.2" width="50%"/>

**Figure 6. -** Color-Magnitude Diagrams (CMDs) of the Gaia XP sample.
    _Left:_  The CMD of Gaia sources within 1 kpc is color-coded by $\eta$, defined in Eq. \ref{eq:eta}, which is the likelihood that a star's orbit is halo-like rather than disk-like. Blue colors denote stars that are more likely to be on halo-like orbits.
    _Right:_ CMD of the sample with halo-like orbits ($\eta>10$), color-coded by XP-derived metallicity.
    Typical absolute-magnitude error is dominated by distance uncertainties, with $\sigma_{M_G}\approx 0.08 \mathrm{mag}$ at $d=800 \mathrm{pc}$.
    The gray background represents sources within 100 pc.
    The dot--dashed colored lines show loci from the PARSEC stellar evolution models evaluated at a fixed age of 5 Gyr for different metallicities. The black dotted lines indicate iso-mass lines from the PARSEC models.
     (*fig:cmd_eta_feh*)

</div>
<div id="div_fig2">

<img src="tmp_2601.19522/./fig/mf_feh.png" alt="Fig8.1" width="50%"/><img src="tmp_2601.19522/./fig/xi_mr_vs_feh_gc.png" alt="Fig8.2" width="50%"/>

**Figure 8. -** 
    Stellar mass functions for the metal-poor sample.
    The logarithmic mass density, $\log_{10}\Phi$[number kpc$^{-3}$ M$_\odot^{-1}$], is shown as a function of stellar mass $M$ for different metallicity bins (color-coded by [Fe/H], see color bar).
    Stellar masses are derived from PARSEC models (solid lines) and BaSTI models (dotted lines).
    Symbols show the binned densities with shaded bands indicating $1\sigma$ uncertainties.
    The dashed gray line shows the canonical Kroupa IMF, scaled for visual comparison.
    The mass ratio, $\xi_{\mathrm{MR}}$, as a function of [Fe/H].
    The ratio $\xi_{\mathrm{MR}}$ is defined as the stellar mass fraction between $0.2 {\rm M}_\odot$ and $0.5 {\rm M}_\odot$ relative to the mass between $0.2 {\rm M}_\odot$ and $0.7 {\rm M}_\odot$.
    Our results for metal-poor stars are shown as blue symbols for individual metallicity bins (stars).  These are compared against a compilation of literature data (other colored symbols). The horizontal black-dashed and grey-dotted lines represent the values for canonical Kroupa and Salpeter IMFs, respectively. The metallicity-dependent model from \cite{yan2024} is shown as a grey dash-dot curve. The shaded grey area shows the metal-poor regime with [Fe/H]$<-1.1$. (*fig:smf*)

</div>
<div id="div_fig3">

<img src="tmp_2601.19522/./fig/veff_plot.png" alt="Fig1" width="100%"/>

**Figure 1. -** The estimated effective volume $\tilde{V}_{\mathrm{eff}}$ as a function of absolute magnitude $M_{G0}$. The plot shows how the volume accessible to the survey is much larger for brighter (lower $M_{G0}$), more massive stars than for fainter, less massive ones. (*fig:veff*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.19522"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\mrs}[1]{\textbf{\color{forestgreen(traditional)} (MRS: #1)}}$
$\newcommand{\Cata}[1]{\textbf{\color{blue} (Cata: #1)}}$
$\newcommand{\Diego}[1]{\textbf{\color{red} (Diego: #1)}}$
$\newcommand{\Alex}[1]{\textbf{\color{orange} (Alex: #1)}}$
$\newcommand{\Javi}[1]{\textbf{\color{cyan} (Javi: #1)}}$
$\newcommand{\Pau}[1]{\textbf{\color{red} (Pau: #1)}}$
$\newcommand{\specialcell}[2][c]{\begin{tabular}[#1]{@ l@ }#2\end{tabular}}$
$\newcommand{\Lsun}{L_{\odot}}$
$\newcommand{\Msun}{M_{\odot}}$
$\newcommand{\Mjup}{M_{\mathrm{J}}}$
$\newcommand{\Rsun}{R_{\odot}}$
$\newcommand{\Msunyr}{M_{\odot}~yr^{-1}}$
$\newcommand{\gppr}{\stackrel{>}{\scriptstyle \sim}}$
$\newcommand{\gappr}{\raisebox{-0.4ex}{\gppr}}$
$\newcommand{\lppr}{\stackrel{<}{\scriptstyle \sim}}$
$\newcommand{\lappr}{\raisebox{-0.4ex}{\lppr}}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand\hyper{@linkstart##1##2 $
$     }$
$\newcommand\hyper{@linkstart##1##2 $
$     }$
$\newcommand\hyper{@linkstart##1##2 $
$     }$
$\newcommand\hyper{@linkstart##1##2 $
$     }$</div>



<div id="title">

# Predicted incidence of Jupiter-like planets around white dwarfs

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2601.19531-b31b1b.svg)](https://arxiv.org/abs/2601.19531)<mark>Appeared on: 2026-01-28</mark> -  _Accepted for publication in A&A_

</div>
<div id="authors">

A. Mauch-Soriano, et al. -- incl., <mark>W. Brandner</mark>

</div>
<div id="abstract">

**Abstract:** Gas-giant planets and brown dwarfs have been discovered in large numbers around main-sequence stars and even evolved stars. In contrast, and despite ongoing imaging surveys using state-of-the-art facilities, only a handful of substellar companions to white dwarfs are known. It remains unclear whether this paucity reflects observational challenges or the consequences of stellar evolution. We aim to carry out population synthesis of substellar objects around white dwarfs to predict the fraction and properties of white dwarfs hosting substellar companions. We generated a representative population of white-dwarf progenitors (up to $4$ $\Msun$ ) with substellar companions, adopting companion distributions derived from radial-velocity surveys of giant stars and a global age-metallicity relation. We then combined the stellar-evolution codes Modules for Experiments in Stellar Astrophysics (MESA) and Single Star Evolution (SSE) with standard prescriptions for mass loss and stellar tides to predict the resulting population of white dwarfs and their substellar companions. We find that the predicted fraction of white dwarfs hosting substellar companions in the Milky Way is, independent of uncertainties related to initial distributions, stellar tides, or stellar mass loss during the asymptotic giant branch, below $\sim3\pm1.5$ \% . The occurrence rate peaks at relatively low-mass ( $\sim 0.53M_\odot$ to $\sim 0.66M_\odot$ ) white dwarfs and relatively young ( $\sim 1$ - $6$ Gyr) systems, where it can reach $\gappr3$ \% .   The semimajor axes of the surviving companions range from $3-24$ au with a median of $11$ au. We estimate that $\sim95$ \% of the predicted companions are gas-giant planets, which translates to a predicted general Jupiter-like planet occurrence rate around white dwarfs below $\sim2.9\pm1.4$ \% . These occurrence rates might slightly increase if multi-planetary systems are considered.   Furthermore, owing to the strong dependence   of companion occurrence on the metallicity of the white dwarf progenitor, the assumed age-metallicity relation strongly affects the predictions. Based on recent estimates of the local age-metallicity relation, we estimate that the fraction of white dwarfs with companions close to the Sun might reach $\lappr8$ \% . If the planetary and brown dwarf companion distributions derived from intermediate-mass giant stars through radial velocity surveys reflect the characteristics of the true population, less than $3 \pm1.5$ \% of white dwarfs host substellar companions. Depending somewhat on the age-metallicity relation, this most likely represents an upper limit on possible detections because a significant number of companions might not be detectable with current facilities.

</div>

<div id="div_fig1">

<img src="tmp_2601.19531/./Figures/WD_corrected_histograms_2x2_NEW.png" alt="Fig4" width="100%"/>

**Figure 4. -** Final and initial properties of systems that host a susbtellar companion. Each row shows a set of histograms comparing two WD samples: one modeled using \citetalias[][]{Villaver_2009}(left) and one based on the \citetalias[][]{rasioetal96-1}(right) approximation for stellar tides. All histograms are normalized to the total number of surviving companions for each simulation. The final semimajor axes are significantly larger than the initial ones for both tidal prescriptions (top left). For weak tides, the eccentricity distribution does not significantly change, while the distribution moves slightly toward smaller eccentricities for strong tides (top right). The bottom panels show the WD mass and age distributions for both approximations of tidal forces which appear to be rather similar. (*fig:final-and-WD*)

</div>
<div id="div_fig2">

<img src="tmp_2601.19531/./Figures/orbital_evolution_combined_corrected.png" alt="Fig2" width="100%"/>

**Figure 2. -** 
    Orbital evolution of a $1$\Mjup gas-giant planet orbiting a $2$\Msun star with $Z=0.0187$, comparing calculations performed with SSE (left), the MESA default test suite (middle), for which the AGB mass-loss efficiency is set to $\eta=0.7$, and MESA assuming a more realistic mass-loss efficiency ($\eta= 0.02$; right).
    The orbits are assumed to be circular, and initial separations range from $2$ to $4.5$ au, with a step size of $0.5$ au. The red filled area corresponds to the stellar radius, while purple and green lines denote the orbital separation of the engulfed and surviving planets, respectively. The insets show zoomed-in views of the thermal pulse phase of the AGB and highlight its critical role in planetary engulfment. SSE does not account for the thermal pulses (left). For a large mass-loss efficiency (middle), most planets survive because the orbit expansion caused by stellar mass loss dominates. In the most realistic scenario assuming a small efficiency (right), the star evolves through more thermal pulses and reaches a larger radius, which causes most planets to be engulfed. (*fig:stellar_evolution*)

</div>
<div id="div_fig3">

<img src="tmp_2601.19531/./Figures/Initial_vs_final_log_mass_new2.png" alt="Fig3" width="100%"/>

**Figure 3. -** Survival of substellar companions as function of companion mass (top), initial semimajor axis (middle), and initial eccentricity (bottom).
        The left panels represent results when considering the tidal force model by \citetalias{Villaver_2009}, while the right panels show results when using the tidal force model by \citetalias{rasioetal96-1}.  All distributions are normalized by
         the total number of stars (1000).
        The number of surviving companions is indicated in the figure for each model (blue histogram). The vertical lines correspond to the median value of each sample. The solid lines correspond to the cumulative distributions.
        In general, the initial distributions of surviving companions (orange) are shifted toward lower masses, larger semimajor axes, and smaller eccentricities compared to the initial values of all companions (blue).
        These effects are slightly more pronounced for stronger tides (right panels).
         (*fig:Initial planets distributions*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2601.19531"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

119  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

15  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

6  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
