# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']


A. Frank  ->  A. Frank  |  ['A. Frank']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
S. Kraus  ->  S. Kraus  |  ['S. Kraus']
Arxiv has 73 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2506.18974


extracting tarball to tmp_2506.18974...

 done.


Found 92 bibliographic references in tmp_2506.18974/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2506.19206
extracting tarball to tmp_2506.19206... done.
Retrieving document from  https://arxiv.org/e-print/2506.19547


extracting tarball to tmp_2506.19547...

 done.
Retrieving document from  https://arxiv.org/e-print/2506.19566



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2506.19566...

 done.


Found 72 bibliographic references in tmp_2506.19566/ICMclustersInQ1.bbl.
Retrieving document from  https://arxiv.org/e-print/2506.19668


extracting tarball to tmp_2506.19668...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.18974-b31b1b.svg)](https://arxiv.org/abs/2506.18974) | **Euclid: An emulator for baryonic effects on the matter bispectrum**  |
|| P. A. Burger, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-06-25*|
|*Comments*| *24 pages, 18 figures, submitted to A&A*|
|**Abstract**|            The Euclid mission and other next-generation large-scale structure surveys will enable high-precision measurements of the cosmic matter distribution. Understanding the impact of baryonic processes such as star formation and AGN feedback on matter clustering is crucial to ensure precise and unbiased cosmological inference. Most theoretical models of baryonic effects to date focus on two-point statistics, neglecting higher-order contributions. This work develops a fast and accurate emulator for baryonic effects on the matter bispectrum, a key non-Gaussian statistic in the nonlinear regime. We employ high-resolution $N$-body simulations from the BACCO suite and apply a combination of cutting-edge techniques such as cosmology scaling and baryonification to efficiently span a large cosmological and astrophysical parameter space. A deep neural network is trained to emulate baryonic effects on the matter bispectrum measured in simulations, capturing modifications across various scales and redshifts relevant to Euclid. We validate the emulator accuracy and robustness using an analysis of \Euclid mock data, employing predictions from the state-of-the-art FLAMINGO hydrodynamical simulations. The emulator reproduces baryonic suppression in the bispectrum to better than 2$\%$ for the $68\%$ percentile across most triangle configurations for $k \in [0.01, 20]\,h^{-1}\mathrm{Mpc}$ and ensures consistency between cosmological posteriors inferred from second- and third-order weak lensing statistics.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.19566-b31b1b.svg)](https://arxiv.org/abs/2506.19566) | **Euclid: Quick Data Release (Q1) -- Watching ICM-selected galaxy clusters with Euclid eyes -- prospects of Euclid data in the context of large SZ and X-ray based surveys**  |
|| M. Klein, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-06-25*|
|*Comments*| *11 pages, 10 figures*|
|**Abstract**|            Galaxy clusters detected through their X-ray emission or Sunyaev--Zeldovich effect (SZE), both produced by the intra-cluster medium (ICM), are key probes in cosmological and astrophysical studies. To maximise the scientific return of such surveys, complementary data are required for cluster confirmation and redshift estimation. This is typically provided by wide-field optical and infrared surveys, which are increasingly challenged by ongoing and future ICM-selected samples. In particular, at high redshifts ($z>1$) probed by upcoming SZE-selected samples, current large surveys may be insufficient for reliable confirmation. Deep, high-resolution infrared surveys like Euclid will thus be essential for confirming most high-redshift clusters. We present an analysis of the first sizeable Euclid dataset (Q1), overlapping with several ICM-selected cluster samples. We apply an adaptation of the MCMF cluster confirmation tool to estimate key properties, including redshift and richness, and to predict Euclid's capabilities for high-redshift cluster confirmation. We find promising performance, particularly at high redshifts, while richness estimates at low redshifts ($z<0.4$) are currently limited by Q1 data quality but should improve with future releases. Using MCMF runs on random lines of sight, we predict that Euclid will confirm clusters at $1<z<2$ as effectively as current optical surveys at $z<0.6$, significantly enhancing high-redshift confirmation. SZE-selected samples will thus greatly benefit from Euclid overlap. Among five known high-$z$ SZE clusters in Q1, we identify the highest-redshift jellyfish galaxy candidate to date, EUCLJ035330.86$-$504347.6 in SPT-CLJ0353$-$5043 ($z=1.32$), two massive star-forming galaxies near ACT-CLJ0350.0$-$4819 ($z=1.46$), and strong lensing features in SPT-CLJ0353$-$5043 and SPT-CLJ0421$-$4845.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.19206-b31b1b.svg)](https://arxiv.org/abs/2506.19206) | **A Fast Bayesian Method for Coherent Gravitational Wave Searches with Relative Astrometry**  |
|| B. Zhang, et al. -- incl., <mark>Y. Wang</mark> |
|*Appeared on*| *2025-06-25*|
|*Comments*| *14 pages, 5 figures*|
|**Abstract**|            Using relative stellar astrometry for the detection of coherent gravitational wave sources is a promising method for the microhertz range, where no dedicated detectors currently exist. Compared to other gravitational wave detection techniques, astrometry operates in an extreme high-baseline-number and low-SNR-per-baseline limit, which leads to computational difficulties when using conventional Bayesian search techniques. We extend a technique for efficiently searching pulsar timing array datasets through the precomputation of inner products in the Bayesian likelihood, showing that it is applicable to astrometric datasets. Using this technique, we are able to reduce the total dataset size by up to a factor of $\mathcal{O}(100)$, while remaining accurate to within 1% over two orders of magnitude in gravitational wave frequency. Applying this technique to simulated astrometric datasets for the Kepler Space Telescope and Nancy Grace Roman Space Telescope missions, we obtain forecasts for the sensitivity of these missions to coherent gravitational waves. Due to the low angular sky coverage of astrometric baselines, we find that coherent gravitational wave sources are poorly localized on the sky. Despite this, from $10^{-8}$ Hz to $10^{-6}$ Hz, we find that Roman is sensitive to coherent gravitational waves with an instantaneous strain above $h_0 \simeq 10^{-11.4}$, and Kepler is sensitive to strains above $h_0 \simeq $ $10^{-12.4}$. At this strain, we can detect a source with a frequency of $10^{-7}$ Hz and a chirp mass of $10^9$ $M_\odot$ at a luminosity distance of 3.6 Mpc for Kepler, and 0.3 Mpc for Roman.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.19547-b31b1b.svg)](https://arxiv.org/abs/2506.19547) | **Understanding the Drag Torque in Common Envelope Evolution**  |
|| S. Bhattacharyya, et al. -- incl., <mark>A. Frank</mark> |
|*Appeared on*| *2025-06-25*|
|*Comments*| **|
|**Abstract**|            Common envelope (CE) evolution is largely governed by the drag torque applied on the inspiralling stellar components by the envelope. Previous work has shown that idealized models of the torque based on a single body moving in rectilinear motion through an unperturbed atmosphere can be highly inaccurate. Progress requires new models for the torque that account for binarity. Toward this end we perform a new 3D global hydrodynamic CE simulation with the mass of the companion point particle set equal to the mass of the asymptotic giant branch star core particle to maximize symmetry and facilitate interpretation. First, we find that a region around the particles of a scale comparable to their separation contributes essentially all of the torque. Second, the density pattern of the torque-dominating gas and, to an extent, this gas itself, is roughly in corotation with the binary. Third, approximating the spatial distribution of the torquing gas as a uniform-density prolate spheroid whose major axis resides in the orbital plane and lags the line joining the binary components by a constant phase angle reproduces the torque evolution remarkably well, analogous to studies of binary supermassive black holes. Fourth, we compare the torque measured in the simulation with the predictions of a model that assumes two weak point-mass perturbers undergoing circular motion in a uniform background without gas self-gravity, and find remarkable agreement with our results if the background density is taken to be equal to a fixed fraction ($\approx0.44$) of the density at the spheroid surface. Overall, this work makes progress toward developing simple time-dependent models of the CE phase.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.19668-b31b1b.svg)](https://arxiv.org/abs/2506.19668) | **HD 143006: Interferometric Confirmation of Misaligned Protoplanetary Disc with CHARA/MIRCX and VLTI/PIONIER**  |
|| I. Codron, et al. -- incl., <mark>S. Kraus</mark> |
|*Appeared on*| *2025-06-25*|
|*Comments*| **|
|**Abstract**|            The outer regions of the protoplanetary disc surrounding the T Tauri star HD 143006 show rings, dust asymmetries and shadows. Whilst rings and dust asymmetries can arise from companions and other mechanisms, shadows and misaligned discs in particular are typically attributed to the presence of misaligned planets or stellar-mass companions. To understand the mechanisms that drive these traits, the innermost regions of discs need to be studied. Using CHARA/MIRCX and VLTI/PIONIER, we observed the sub-au region of HD 143006. We constrain the orientation of the inner disc of HD 143006 and probe whether a misalignment between the inner and outer disc could be the cause of the shadows. Modelling the visibilities using a geometric model, the inclination and position angle are found to be $i=22^\circ\pm 3^\circ$ and $\mathrm{PA}=158^\circ\pm 8^\circ$ respectively, with an inner dust sublimation radius of $\sim0.04$ au. The inner disc is misaligned by $39^\circ\pm4^\circ$ with respect to the outer disc, with the far side of the inner disc to the east and the far side of the outer disc to the west. We constrain $h/R$ (scattering surface/radius of scattered light) of the outer disc at $18$ au to be about $13\%$ by calculating the offset between the shadow position and the central star. No companion was detected, with a magnitude contrast of $4.4$ in the H-band and placing an upper mass limit of $0.17 M_\odot$ at separations of $0-8$ au. Therefore, we cannot confirm or rule out that a low-mass star or giant planet is responsible for the misalignment and dust sub-structures.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2506.18974/./figs/Flamingo_variation_cosmology_3pt.png', 'tmp_2506.18974/./figs/powerspec_dependence_folded.png', 'tmp_2506.18974/./figs/bispec_dependence_folded.png', 'tmp_2506.18974/./figs/xikappa_dependence_DMO.png', 'tmp_2506.18974/./figs/Map2_dependence_DMO.png', 'tmp_2506.18974/./figs/Map3_dependence_DMO.png']
copying  tmp_2506.18974/./figs/Flamingo_variation_cosmology_3pt.png to _build/html/
copying  tmp_2506.18974/./figs/powerspec_dependence_folded.png to _build/html/
copying  tmp_2506.18974/./figs/bispec_dependence_folded.png to _build/html/
copying  tmp_2506.18974/./figs/xikappa_dependence_DMO.png to _build/html/
copying  tmp_2506.18974/./figs/Map2_dependence_DMO.png to _build/html/
copying  tmp_2506.18974/./figs/Map3_dependence_DMO.png to _build/html/
exported in  _build/html/2506.18974.md
    + _build/html/tmp_2506.18974/./figs/Flamingo_variation_cosmology_3pt.png
    + _build/html/tmp_2506.18974/./figs/powerspec_dependence_folded.png
    + _build/html/t

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\Msun}{ h^{-1}{\rm M_{ \odot}}}$
$\newcommand{\hkpc}{ h^{-1}{\rm kpc}}$
$\newcommand{\ihMpc}{ h {\rm Mpc}^{-1}}$
$\newcommand{\raul}[1]{\textcolor{red}{\textbf{Raul: #1}}}$
$\newcommand{\gio}[1]{\textcolor{red}{\textbf{GA: #1}}}$
$\newcommand{\anik}[1]{\textcolor{orange}{\textbf{#1}}}$
$\newcommand{\Mperp}{\mathcal{M}_{\perp}}$
$\newcommand{\MapMapMap}{{\expval{\Map^3}}}$
$\newcommand{\MapMperpMperp}{{\expval{\Map\Mperp^2}}}$
$\newcommand{\Mapperp}{\mathcal{M}_{\mathrm{ap}/\perp}}$
$\newcommand{\diracd}{\delta_\mathrm{D}}$
$\newcommand{\LCDM}{\LambdaCDM}$
$\newcommand{\bkappa}{\ensuremath{B_\kappa}}$
$\newcommand{\pkappa}{\ensuremath{P_\kappa}}$
$\newcommand{\MapEst}{\widehat{\mathcal{M}}_\mathrm{ap}}$
$\newcommand{\MperpEst}{\widehat{\mathcal{M}}_\perp}$
$\newcommand{\dd}{\mathrm{d}}$
$\newcommand{\vtheta}{\pmb{\theta_\mathrm{ap}}}$
$\newcommand{\vell}{\pmb{\ell}}$
$\newcommand{\Npix}{N_\mathrm{pix}}$
$\newcommand{\Ngal}{N_\mathrm{eff}}$
$\newcommand{\expval}[1]{\langle #1 \rangle}$
$\newcommand{\code}{\tt }$
$\newcommand{\mycomment}[1]$
$\newcommand{\corr}[1]{\textcolor{red}{\textbf{#1}}}$
$\newcommand{\laila}[1]{{\color{teal}#1}}$
$\newcommand{\orcid}[1]$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.3}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.3}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand\say{#1}$</div>



<div id="title">

# $\Euclid$\/: An emulator for baryonic effects on the matter bispectrum$\thanks{This paper is published on behalf of the Euclid Consortium.}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.18974-b31b1b.svg)](https://arxiv.org/abs/2506.18974)<mark>Appeared on: 2025-06-25</mark> -  _24 pages, 18 figures, submitted to A&A_

</div>
<div id="authors">

P. A. Burger, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** The $\Euclid$ mission and other next-generation large-scale structure surveys will enable high-precision measurements of the cosmic matter distribution. Understanding the impact of baryonic processes such as star formation and active galactic nuclei (AGN) feedback on matter clustering is crucial to ensure precise and unbiased cosmological inference. Most theoretical models of baryonic effects to date focus on two-point statistics, neglecting higher-order contributions. This work develops a fast and accurate emulator for baryonic effects on the matter bispectrum, a key non-Gaussian statistic in the nonlinear regime. We employ high-resolution $N$ -body simulations from the BACCO suite and apply a combination of cutting-edge techniques such as cosmology scaling and baryonification to efficiently span a large cosmological and astrophysical parameter space. A deep neural network is trained to emulate baryonic effects on the matter bispectrum measured in simulations, capturing modifications across various scales and redshifts relevant to $\Euclid$ . We validate the emulator accuracy and robustness using an analysis of $\Euclid$ mock data, employing predictions from the state-of-the-art FLAMINGO hydrodynamical simulations. The emulator reproduces baryonic suppression in the bispectrum to better than 2 $\%$ for the $68\%$ percentile across most triangle configurations for $k \in [0.01, 20] \ihMpc$ and ensures consistency between cosmological posteriors inferred from second- and third-order weak lensing statistics. These results demonstrate that our emulator meets the high-precision requirements of the $\Euclid$ mission for at least the first data release and provides forecasts of the cosmological and astrophysical information contained in the small-scale matter bispectrum. This underscores the potential of emulation techniques to bridge the gap between complex baryonic physics and observational data, maximising the scientific output of $\Euclid$ . Our emulators will becomepublicly available in $\url{https://baccoemu.readthedocs.io/en/latest/}$ .

</div>

<div id="div_fig1">

<img src="tmp_2506.18974/./figs/Flamingo_variation_cosmology_3pt.png" alt="Fig8" width="100%"/>

**Figure 8. -** Estimated mean and $68\%$ credible intervals for all FLAMINGO models described in Sect. \ref{sec:Flamingo_sim}. Since the LS8 and \Planck cosmology is different we plot $\Delta \Omega_\mathrm{m} = \Omega^\mathrm{best}_\mathrm{m} - \Omega^\mathrm{true}_\mathrm{m}$ and in analogy $\Delta S_8$. The different colours show cases where parameters are fixed. The stars indicate the best-fitting parameters. The figure is for $\xi_\kappa + \langle \Map^2 \rangle + \langle \Map^3 \rangle$. The corresponding figure for $\xi_\kappa + \langle \Map^2 \rangle$ is shown in Fig. \ref{fig:Flamingo_variation_cosmology_2nd}. (*fig:Flamingo_variation_cosmology*)

</div>
<div id="div_fig2">

<img src="tmp_2506.18974/./figs/powerspec_dependence_folded.png" alt="Fig12.1" width="50%"/><img src="tmp_2506.18974/./figs/bispec_dependence_folded.png" alt="Fig12.2" width="50%"/>

**Figure 12. -** In this figure, we show the power spectrum's dependence on baryonic feedback effects in the three upper rows. In the bottom three rows, we show the dependence of the bispectrum for $k_1=k_2=k_3$. (*fig:bispec_dependence*)

</div>
<div id="div_fig3">

<img src="tmp_2506.18974/./figs/xikappa_dependence_DMO.png" alt="Fig18.1" width="33%"/><img src="tmp_2506.18974/./figs/Map2_dependence_DMO.png" alt="Fig18.2" width="33%"/><img src="tmp_2506.18974/./figs/Map3_dependence_DMO.png" alt="Fig18.3" width="33%"/>

**Figure 18. -** Dependence of the $\expval{\Map^n}$ and $\xi_\kappa$ to changes of baryonic and cosmological parameters. We scaled the model vectors using the same model just for a GrO case, disentangling the effect of the baryons. The $\theta_\mathrm{ap} = \{\theta_\mathrm{ap,1},\theta_\mathrm{ap,2},\theta_\mathrm{ap,3}\}$ values for the lowest panel are increasing from left to right as in Fig. \ref{fig:Map3_Flamingo}, increasing first $\theta_\mathrm{ap,3}$, then $\theta_\mathrm{ap,2}$ and lastly $\theta_\mathrm{ap,1}$.  (*fig:Map23_dependence_GrO*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.18974"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\orcid}[1]$</div>



<div id="title">

# $\Euclid$\/: Quick Data Release (Q1) -- Watching ICM-selected galaxy clusters with $\Euclid$ eyes - prospects of $\Euclid$ data in the context of large SZ and X-ray based surveys$\thanks{This paper is published on       behalf of the Euclid Consortium}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.19566-b31b1b.svg)](https://arxiv.org/abs/2506.19566)<mark>Appeared on: 2025-06-25</mark> -  _11 pages, 10 figures_

</div>
<div id="authors">

M. Klein, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** Galaxy clusters detected through their X-ray emission or Sunyaev--Zeldovich effect (SZE), both produced by the intra-cluster medium (ICM), have been successfully used in cosmological and astrophysical studies. To maximise the scientific return and robustness of such studies, these surveys require complementary information from other datasets.Systematic cluster confirmation and redshifts of ICM-selected cluster candidates are typically provided by wide-field optical and infrared imaging surveys, which are becoming increasingly challenged by ongoing ICM-selected samples. Particularly at high redshifts ( $z>1$ ) reached by future SZE-selected samples, current large surveys may not be sufficient for this task.Deep, high-resolution infrared surveys, such as those conducted with $\Euclid$ , are therefore essential for confirming the majority of high-redshift clusters in these future samples.In this context, we present an analysis of the first sizeable $\Euclid$ dataset (Q1), which overlaps with several ICM-selected cluster samples. We apply an adaptation of the MCMF cluster redshift and confirmation tool to $\Euclid$ data to estimate key cluster properties, including redshift and richness and to predict its capabilities to confirm high-redshift galaxy clusters.We find promising performance in redshift and richness estimation, particularly at high redshift. The performance in richness estimation at low redshifts ( $z<0.4$ ) is currently impacted by limitations of the Q1 dataset and are likely to improve in future data releases.By comparing MCMF measurements along random lines of sight with similar measurements from the SZE-based ACT-DR5 MCMF catalogue, we predict that the ability to confirm clusters at $1<z<2$ using $\Euclid$ will be comparable to that of current large optical surveys at $z<0.6$ and will significantly enhance the capability of cluster confirmation at high redshifts. SZE-selected cluster samples will therefore especially benefit from overlap with $\Euclid$ datasets.Studying the five known high- $z$ SZE-selected clusters in Q1, we identify the highest-redshift jellyfish galaxy candidate found to date in an ICM-selected cluster. This galaxy, EUCL J035330.86 $-$ 504347.6, is located in the massive cluster SPT-CL J0353 $-$ 5043 at $z=1.32$ . We also find two massive star-forming galaxies projected close to the core of ACT-CL J0350.0 $-$ 4819 ( $z\simeq1.46$ ), and evidence of strong lensing features in SPT-CL J0353 $-$ 5043 and SPT-CL J0421 $-$ 4845.

</div>

<div id="div_fig1">

<img src="tmp_2506.19566/./Euc_Q1_photozcomp.png" alt="Fig7.1" width="50%"/><img src="tmp_2506.19566/./Euclid_Q1_lambdamass_style.png" alt="Fig7.2" width="50%"/>

**Figure 7. -** _Left:_ Recovery of published cluster redshifts with \Euclid-MCMF. Redshifts of 1eRASS J040527.1$-$490347 (grey square) and SPT-CL J0421$-$4845 (yellow square) are discussed in greater detail in Sect. \ref{sc:indivclusters}. _Right:_\Euclid-MCMF richness ($\lambda$) versus ICM-based mass estimates ($M_{500}$). (*fig:redshiftRichness*)

</div>
<div id="div_fig2">

<img src="tmp_2506.19566/./SPT050353-5043_40arcRGB.png" alt="Fig9.1" width="50%"/><img src="tmp_2506.19566/./SPT050353-5043_40arcsecVISwZoom.png" alt="Fig9.2" width="50%"/>

**Figure 9. -** _Left:_\Euclid\IE, \YE, \HE-band colour composite image of the central $40$\arcsec$ \times 40$\arcsec$$ region of SPT-CL J0353$-$5043. _Right:_ Grey scale image of the same region in \Euclid\IE. Boxes mark the highest redshift jellyfish candidate to date (1) and two strongly lensed galaxies (2,3). (*fig:SPT0353-50*)

</div>
<div id="div_fig3">

<img src="tmp_2506.19566/./ACThighz_richness_vs_redshift.png" alt="Fig1" width="100%"/>

**Figure 1. -** $\lambda-z$ plot along the line of sight of ACT-CL J0350.0$-$4819, the ICM-selected cluster
with the highest redshift measured in the sample. (*fig:actlamz*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.19566"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

540  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

12  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
