# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

A. Pillepich  ->  A. Pillepich  |  ['A. Pillepich']
K. Lee  ->  K. Lee  |  ['K. Lee']


X. Zhang  ->  X. Zhang  |  ['X. Zhang']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
Arxiv has 75 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2507.02105


extracting tarball to tmp_2507.02105...

 done.






















Found 82 bibliographic references in tmp_2507.02105/mnras_template.bbl.
Retrieving document from  https://arxiv.org/e-print/2507.02355


extracting tarball to tmp_2507.02355...

 done.
Retrieving document from  https://arxiv.org/e-print/2507.02558



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2507.02558... done.
Retrieving document from  https://arxiv.org/e-print/2507.02651



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2507.02651...

 done.
Retrieving document from  https://arxiv.org/e-print/2507.02806


extracting tarball to tmp_2507.02806...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.02105-b31b1b.svg)](https://arxiv.org/abs/2507.02105) | **Photometric analysis of the intracluster light in the TNG300 simulation and wide-field observations**  |
|| D. Montenegro-Taborda, et al. -- incl., <mark>A. Pillepich</mark> |
|*Appeared on*| *2025-07-07*|
|*Comments*| *Accepted for publication in MNRAS (submitted April 8, 2025)*|
|**Abstract**|            We present a robust, apples-to-apples comparison between the photometric properties of the intracluster light (ICL) in the TNG300 magnetohydrodynamic cosmological simulation and those in Wendelstein Wide Field Imager (WWFI) observations. This is accomplished by generating synthetic $g'$-band images of 40 massive ($\log\left(M_{\rm 200, crit}/{\rm M}_{\odot}\right) > 14.5$) TNG300 clusters at $z \approx 0.06$, closely mimicking WWFI observations, and then performing identical photometric calculations on the synthetic and real images. Importantly, we apply the same observationally motivated satellite-masking procedure to both data-sets, which effectively removes any possible biases introduced by the halo finder. We first analyze the light distribution of the `smooth' stellar component of each cluster, composed of the brightest cluster galaxy (BCG) plus the ICL, and find that it tends to be about twice as extended in TNG300 than in observations, while also being approximately 1 $g'$ mag arcsec$^{-2}$ brighter. We then quantify $f_{\rm ICL}$, the ICL fraction relative to the BCG+ICL, by considering several ICL definitions: (i) the light dimmer than a surface brightness cut at 27 $g'$ mag arcsec$^{-2}$, (ii) the excess light over a de Vaucouleurs profile, (iii) the light beyond twice the half-light radius ($2 r_{\rm half}$), and (iv) the light beyond a fixed circular aperture of 30, 50, or 100 kpc. For most definitions, the median $f_{\rm ICL}$ is consistent between simulation and observations. However, the observations exhibit larger scatter in $f_{\rm ICL}$, which we attribute primarily to observational uncertainties in the total BCG+ICL luminosity rather than `true' cluster-to-cluster variation in the real Universe. We also find that most methods yield median $f_{\rm ICL}$ values near 0.3, which is consistent with a BCG/ICL transition radius around $2 r_{\rm half}$.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.02355-b31b1b.svg)](https://arxiv.org/abs/2507.02355) | **Multi-year Polarimetric Monitoring of Four CHIME-Discovered Repeating Fast Radio Bursts with FAST**  |
|| Y. Feng, et al. -- incl., <mark>K. Lee</mark> |
|*Appeared on*| *2025-07-07*|
|*Comments*| *17 pages, 5 figures,accepted by SCIENCE CHINA*|
|**Abstract**|            In this study, we report multi-year polarization measurements of four repeating FRBs initially discovered by CHIME: FRBs~20190117A, 20190208A, 20190303A, and 20190417A. We observed the four repeating FRBs with FAST, detecting a total of 66 bursts. Two bursts from FRB~20190417A exhibit a circular polarization signal-to-noise ratio greater than 7, with the highest circular polarization fraction recorded at 35.7%. While the bursts from FRBs 20190208A and 20190303A are highly linearly polarized, those from FRBs~20190117A and 20190417A show depolarization due to multi-path propagation, with \sigma_{\mathrm{RM}} = 2.78 \pm 0.05 rad m$^{-2}$ and 5.19 \pm 0.09 rad m$^{-2}$, respectively. The linear polarization distributions among five repeating FRB--FRBs~20190208A, 20190303A, 20201124A, 20220912A, and 20240114A--are nearly identical but show distinct differences from those of non-repeating FRBs. FRBs~20190117A, 20190303A, and 20190417A exhibit substantial rotation measure (RM) variations between bursts, joining other repeating FRBs in this behavior. Combining these findings with published results, 64% of repeating FRBs show RM variations greater than 50 rad m$^{-2}$, and 21\% exhibit RM reversals. A significant proportion of repeating FRBs reside in a dynamic magneto-ionic environment. The structure function of RM variations shows a power-law index of $\gamma \sim (0-0.8)$, corresponding to a shallow power spectrum $\alpha = -(\gamma + 2) \sim -(2.0-2.8)$ of turbulence, if the RM variations are attributed to turbulence. This suggests that the variations are dominated by small-scale RM density fluctuations. We perform K-S tests comparing the RMs of repeating and non-repeating FRBs, which reveal a marginal dichotomy in the distribution of their this http URL caution that the observed dichotomy may be due to the small sample size and selection biases.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.02558-b31b1b.svg)](https://arxiv.org/abs/2507.02558) | **Measuring the muon content of inclined air showers using AERA and the water-Cherenkov detectors of the Pierre Auger Observatory**  |
|| P. A. Collaboration, et al. |
|*Appeared on*| *2025-07-07*|
|*Comments*| *10 pages, 6 figures*|
|**Abstract**|            We present a novel approach for assessing the muon content of air showers with large zenith angles on a combined analysis of their radio emission and particle footprint. We use the radiation energy reconstructed by the Auger Engineering Radio Array (AERA) as an energy estimator and determine the muon number independently with the water-Cherenkov detector array of the Pierre Auger Observatory, deployed on a 1500 m grid. We focus our analysis on air showers with primary energy above 4 EeV to ensure full detection efficiency. Over approximately ten years of accumulated data, we identify a set of 40 high-quality events that are used in the analysis. The estimated muon contents in data are compatible with those for iron primaries as predicted by current-generation hadronic interaction models. This result can be interpreted as a deficit of muons in simulations as a lighter mass composition has been established from Xmax measurements. This muon deficit was already observed in previous analyses of the Auger Collaboration and is confirmed using hybrid events that include radio measurements for the first time.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.02651-b31b1b.svg)](https://arxiv.org/abs/2507.02651) | **Prospects for probing dark matter particles and primordial black holes with the Square Kilometre Array using the 21 cm power spectrum at cosmic dawn**  |
|| M.-L. Zhao, Y. Shao, S. Wang, <mark>X. Zhang</mark> |
|*Appeared on*| *2025-07-07*|
|*Comments*| *26 pages, 11 figures*|
|**Abstract**|            Probing the nature of dark matter (DM) remains an outstanding problem in modern cosmology. The 21 cm signal, as a sensitive tracer of neutral hydrogen during cosmic dawn, provides a unique means to investigate DM nature during this critical epoch. Annihilation and decay of DM particles, as well as Hawking radiation of primordial black holes (PBHs), can modify the thermal and ionization histories of the early universe, leaving distinctive imprints on the 21 cm power spectrum. Therefore, the redshifted 21 cm power spectrum serves as a powerful tool to investigate such DM processes. In this work, we systematically assess the potential of the upcoming Square Kilometre Array (SKA) to constrain DM and PBH parameters using the 21 cm power spectrum. Assuming $10,000$ hours of integration time, the SKA is projected to reach sensitivities of $\langle\sigma v\rangle \leq 10^{-28}\,{\rm cm}^{3}\,{\rm s}^{-1}$ and $\tau\geq 10^{28}\,{\rm seconds}$, for $10\,{\rm GeV}$ DM particles. It can also probe PBHs with masses of $10^{16}\,\mathrm{g}$ and abundances $f_{\mathrm{PBH}} \leq 10^{-6}$. These results indicate that the SKA could place constraints on DM annihilation, decay, and PBH Hawking radiation that are up to two to three orders of magnitude stronger than current limits. Furthermore, the SKA is expected to exceed existing bounds on sub-GeV DM and to probe Hawking radiation from PBHs with masses above $10^{17}\,{\rm g}$, which are otherwise inaccessible by conventional cosmological probes. Overall, the SKA holds great promise for advancing our understanding of both DM particles and PBHs, potentially offering new insights into the fundamental nature of DM.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.02806-b31b1b.svg)](https://arxiv.org/abs/2507.02806) | **GRB 240825A: Early Reverse Shock and Its Physical Implications**  |
|| C. Wu, et al. -- incl., <mark>Y. Wang</mark> |
|*Appeared on*| *2025-07-07*|
|*Comments*| *31 pages, 9 Figures, 10 Tables*|
|**Abstract**|            Early multi-wavelength observations offer crucial insights into the nature of the relativistic jets responsible for gamma-ray bursts and their interaction with the surrounding this http URL present data of GRB 240825A from 17 space- and ground-based telescopes/instruments, covering wavelengths from NIR/optical to X-ray and GeV, and spanning from the prompt emission to the afterglow phase triggered by Swift and Fermi. The early afterglow observations were carried out by SVOM/C-GFT, and spectroscopic observations of the afterglow by GTC, VLT, and TNG determined the redshift of the burst ($z = 0.659$) later.A comprehensive analysis of the prompt emission spectrum observed by Swift-BAT and Fermi-GBM/LAT reveals a rare and significant high-energy cutoff at ~76 MeV. Assuming this cutoff is due to $\gamma\gamma$ absorption allows us to place an upper limit on the initial Lorentz factor, $\Gamma_0 < 245$. The optical/NIR and GeV afterglow light curves be described by the standard external shock model, with early-time emission dominated by a reverse shock (RS) and a subsequent transition to forward shock (FS) emission. Our afterglow modelling yields a consistent estimate of the initial Lorentz factor ($\Gamma_{\rm 0} \sim 234$). Furthermore, the RS-to-FS magnetic field ratio ($\mathcal{R}_B \sim 302$) indicates that the reverse shock region is significantly more magnetized than the FS region. An isotropic-equivalent kinetic energy of $E_{\text{k,iso}} = 5.25 \times 10^{54}$ erg is derived, and the corresponding $\gamma$-ray radiation efficiency is estimated to be $\eta_{\gamma}$ = 3.1%. On the other hand, the standard afterglow model can not reproduce the X-ray light curve of GRB 240825A, calling for improved models to characterize all multi-wavelength data.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2507.02105/./fig/compare_all_subsample_sb_profiles_circ_kpc.png', 'tmp_2507.02105/./fig/compare_all_subsample_sb_profiles_ellip_kpc.png', 'tmp_2507.02105/./fig/f_icl_sblim_27_30_vs_mvir_singlepanel.png', 'tmp_2507.02105/./fig/f_icl_dev_27_30_vs_mvir_singlepanel.png', 'tmp_2507.02105/./fig/barh_plot_f_icl_real_mock_image_plus_subsample_dev_27_multiple_def_5.5_arcsec_1_4_clipping_side.png']
copying  tmp_2507.02105/./fig/compare_all_subsample_sb_profiles_circ_kpc.png to _build/html/
copying  tmp_2507.02105/./fig/compare_all_subsample_sb_profiles_ellip_kpc.png to _build/html/
copying  tmp_2507.02105/./fig/f_icl_sblim_27_30_vs_mvir_singlepanel.png to _build/html/
copying  tmp_2507.02105/./fig/f_icl_dev_27_30_vs_mvir_singlepanel.png to _build/html/
copying  tmp_2507.02105/./fig/barh_plot_f_icl_real_mock_image_plus_subsample_dev_27_multiple_def_5.5_arcsec_1_4_clipping_side.png to _build/html/
exported in  _build/html/2507.02105.md
    + _build/html/tmp_2507.02105/./fig/com

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\Msun}{{\rm M}_{\odot}}$
$\newcommand{\Mtwo}{M_{\rm 200}}$
$\newcommand{\Mfive}{M_{\rm 500}}$
$\newcommand{\Rtwo}{R_{\rm 200}}$
$\newcommand{\Rfive}{R_{\rm 500}}$
$\newcommand{\facc}{f_{\rm acc}}$
$\newcommand{\fex}{f_{\rm ex}}$
$\newcommand{\krot}{\kappa_{\rm rot}}$
$\newcommand{\thebibliography}{\DeclareRobustCommand{\VAN}[3]{##3}\VANthebibliography}$</div>



<div id="title">

# Photometric analysis of the intracluster light in the TNG300 simulation and wide-field observations

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2507.02105-b31b1b.svg)](https://arxiv.org/abs/2507.02105)<mark>Appeared on: 2025-07-07</mark> -  _Accepted for publication in MNRAS (submitted April 8, 2025)_

</div>
<div id="authors">

D. Montenegro-Taborda, et al. -- incl., <mark>A. Pillepich</mark>

</div>
<div id="abstract">

**Abstract:** We present a robust, apples-to-apples comparison between the photometric properties of the intracluster light (ICL) in the TNG300 magnetohydrodynamic cosmological simulation and those in Wendelstein Wide Field Imager (WWFI) observations. This is accomplished by generating synthetic $g'$ -band images of 40 massive ( $\log\left(M_{\rm 200, crit}/{\rm M}_{\odot}\right) > 14.5$ ) TNG300 clusters at $z \approx 0.06$ , closely mimicking WWFI observations, and then performing identical photometric calculations on the synthetic and real images. Importantly, we apply the same observationally motivated satellite-masking procedure to both data-sets, which effectively removes any possible biases introduced by the halo finder. We first analyze the light distribution of the `smooth' stellar component of each cluster, composed of the brightest cluster galaxy (BCG) plus the ICL, and find that it tends to be about twice as extended in TNG300 than in observations, while also being approximately 1 $g'$ mag arcsec $^{-2}$ brighter. We then quantify $f_{\rm ICL}$ , the ICL fraction relative to the BCG+ICL, by considering several ICL definitions: (i) the light dimmer than a surface brightness cut at 27 $g'$ mag arcsec $^{-2}$ , (ii) the excess light over a de Vaucouleurs profile, (iii) the light beyond twice the half-light radius ( $2 r_{\rm half}$ ), and (iv) the light beyond a fixed circular aperture of 30, 50, or 100 kpc. For most definitions, the median $f_{\rm ICL}$ is consistent between simulation and observations. However, the observations exhibit larger scatter in $f_{\rm ICL}$ , which we attribute primarily to observational uncertainties in the total BCG+ICL luminosity rather than `true' cluster-to-cluster variation in the real Universe. We also find that most methods yield median $f_{\rm ICL}$ values near 0.3, which is consistent with a BCG/ICL transition radius around $2 r_{\rm half}$ .

</div>

<div id="div_fig1">

<img src="tmp_2507.02105/./fig/compare_all_subsample_sb_profiles_circ_kpc.png" alt="Fig7.1" width="50%"/><img src="tmp_2507.02105/./fig/compare_all_subsample_sb_profiles_ellip_kpc.png" alt="Fig7.2" width="50%"/>

**Figure 7. -** Median surface brightness (SB) profiles of the simulated (red) and observed (black and blue) BCG+ICL systems, shown both for circular (left) and elliptical (right) apertures. In the case of the observations, the main (consisting of the 38 observed clusters with mass estimates) and full (including all 170 observed clusters) samples are represented by the solid black and dashed blue lines, respectively. The red and black shaded regions represent the 16th to 84th percentile ranges of the TNG300 and WWFI (main sample) profiles, respectively.  The blue shaded region to the left indicates the resolution limit of the simulation, here defined as $4 \epsilon_{\ast} \approx 5.9$ kpc. All SB values are in the observer's frame. (*fig:profiles_comparison*)

</div>
<div id="div_fig2">

<img src="tmp_2507.02105/./fig/f_icl_sblim_27_30_vs_mvir_singlepanel.png" alt="Fig11.1" width="50%"/><img src="tmp_2507.02105/./fig/f_icl_dev_27_30_vs_mvir_singlepanel.png" alt="Fig11.2" width="50%"/>

**Figure 11. -** The ICL fraction that results from defining the ICL as the light with SB dimmer than 27 $g'$ mag arcsec$^{-2}$(left) and as the excess of light with respect to an outwardly extrapolated 2D de Vaucouleurs profile fitted to the inner region brighter than 27 $g'$ mag arcsec$^{-2}$(right), plotted against cluster mass. Red and black dots represent the simulation and observations, respectively, while the solid lines and shaded regions indicate the corresponding medians and 16th--84th percentile ranges. As before, the main measurements (dots, solid lines, and shaded regions) represent the corrected, `SB-limited' measurements (see Section \ref{subsec:undetected_light}) obtained for the `sigma-clipped' images (see Section \ref{subsec:background_subtraction}), while the dashed red line shows the median $f_{\rm ICL}$ obtained by performing `full' measurements on the `raw' synthetic images. Only observed clusters with gravitational mass estimates (38 out of 170) are shown. (*fig:f_icl_sblim_27_and_dev*)

</div>
<div id="div_fig3">

<img src="tmp_2507.02105/./fig/barh_plot_f_icl_real_mock_image_plus_subsample_dev_27_multiple_def_5.5_arcsec_1_4_clipping_side.png" alt="Fig14" width="100%"/>

**Figure 14. -** ICL fraction distributions for the various methods presented in Sections \ref{subsec:f_icl_sb27}--\ref{subsec:f_icl_fixed_kpc}. The black and red bars correspond to the observational (main WWFI sample, composed of 38 clusters) and simulated (TNG300, 40 clusters) cluster populations, respectively. The dashed blue lines show the distributions for the full WWFI sample, i.e. including all 170 observed clusters. The arrows indicate the medians of the distributions. (*fig:f_icl_barchart*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2507.02105"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

133  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

11  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
