# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

A. Somigliana  ->  A. Somigliana  |  ['A. Somigliana']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
K. Lee  ->  K. Lee  |  ['K. Lee']
A. Winter  ->  A. Winter  |  ['A. Winter']
T. Henning  ->  T. Henning  |  ['T. Henning']
J. Liu  ->  J. Liu  |  ['J. Liu']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
J. Liu  ->  J. Liu  |  ['J. Liu']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
Arxiv has 46 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2505.05578


extracting tarball to tmp_2505.05578...

 done.


A. Somigliana  ->  A. Somigliana  |  ['A. Somigliana']


Found 87 bibliographic references in tmp_2505.05578/aa54771-25.bbl.
Retrieving document from  https://arxiv.org/e-print/2505.05945


extracting tarball to tmp_2505.05945...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.06006


extracting tarball to tmp_2505.06006...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.06093


extracting tarball to tmp_2505.06093...

 done.


Found 146 bibliographic references in tmp_2505.06093/XUE_sample_submit.bbl.
Issues with the citations
syntax error in line 373: '}' expected
Retrieving document from  https://arxiv.org/e-print/2505.06167


extracting tarball to tmp_2505.06167...

 done.
Retrieving document from  https://arxiv.org/e-print/2505.06192


extracting tarball to tmp_2505.06192...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.05578-b31b1b.svg)](https://arxiv.org/abs/2505.05578) | **The survivorship bias of protoplanetary disc populations**  |
|| L. A. Malanga, et al. -- incl., <mark>A. Somigliana</mark> |
|*Appeared on*| *2025-05-12*|
|*Comments*| *14 pages, 15 figures*|
|**Abstract**|            The evolution of protoplanetary discs has a substantial impact on theories of planet formation. To date, neither of the two main competing evolutionary models, namely the viscous-photoevaporative paradigm and the MHD winds model, has been ruled out by observations. Due to the high number of sources observed by large surveys, population synthesis is a powerful tool to distinguish the evolution mechanism in observations. We explore the evolution of the mass distribution of synthetic populations under the assumptions of turbulence-driven accretion and dispersal caused by internal photoevaporation. We find that the rapid removal of light discs often results in an apparent increase of the median mass of the survived disc population. This occurs both when the disc properties are independent of each other, and when typical correlations between these quantities and the stellar mass are assumed. Furthermore, as MHD wind-driven accretion rarely manifests the same feature, this serves as a signature of the viscous-photoevaporative evolution when dispersal proceeds from inside-out. Therefore, we propose the evolution of the median mass as a new method to distinguish this model in observed populations. This survivorship bias is not shown by the median accretion rate, which, instead, decreases with time. Moreover, we introduce a new criterion that estimates the disc lifetime as a function of initial conditions and an analytical relation to predict whether internal photoevaporation triggers an inside-out or an outside-in dispersal. We verify both analytical relations with numerical simulations.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06093-b31b1b.svg)](https://arxiv.org/abs/2505.06093) | **XUE. JWST spectroscopy of externally irradiated disks around young intermediate-mass stars**  |
|| M. C. Ramírez-Tannus, et al. -- incl., <mark>A. Winter</mark>, <mark>T. Henning</mark> |
|*Appeared on*| *2025-05-12*|
|*Comments*| *12 pages, 8 appendix, 16 figures, 2 tables. Submitted to A&A*|
|**Abstract**|            Most young stars and therefore planetary systems form in high-mass star forming regions and are exposed to ultraviolet radiation, affecting the protoplanetary disk. These regions are located at large distances and only now with JWST become accessible to study the inner disks surrounding young stars. We present the eXtreme UV Environments (XUE) program, which provides the first detailed characterization of the physical and chemical properties of the inner disks around young intermediate-mass stars exposed to external irradiation from nearby massive stars. We present high signal to noise MIRI-MRS spectroscopy of 12 disks located in three sub-clusters of the high-mass star-forming region NGC 6357. Based on their mid-infrared spectral energy distribution, we classify the XUE sources into Group I and II based on the Meeus scheme. We analyze their molecular emission features, and compare their spectral indices and 10 $\mu$m silicate emission profiles to those of nearby Herbig and intermediate T Tauri disks. Despite being more massive, the XUE stars host disks with molecular richness comparable to isolated T Tauri systems. The 10 $\mu$m silicate features show lower F$_{11.3}$/F$_{9.8}$ ratios at a given F$_{\mathrm{peak}}$, but current uncertainties prevent conclusions about their inner disk properties. Most disks display water emission from the inner disk, suggesting that even in these extreme environments rocky planets can form in the presence of water. The absence of strong line fluxes and other irradiation signatures suggests that the XUE disks have been truncated by external UV photons. However, this truncation does not appear to significantly impact the chemical richness of their inner regions. These findings indicate that even in extreme environments, IMTT disks can retain the ingredients necessary for rocky planet formation.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.05945-b31b1b.svg)](https://arxiv.org/abs/2505.05945) | **Optical QPOs with dual periodicities 1103days and 243days in the blue quasar SDSS J100438.8+151056**  |
|| G. Liao, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-05-12*|
|*Comments*| **|
|**Abstract**|            This manuscript investigates the possible existence of a binary supermassive black holes (BSMBH) system in the blue quasar SDSS J100438.8+151056 (=SDSS J1004+1510) at z=0.219 based on the detection of robust optical QPOs. We determine QPOs using multiple analysis methods applied to the CSS-V, ZTFg/r band light curves, and additionally, combined with the characteristics of broad emission lines and explores potential mechanisms for the QPOs, including jet and disk precession models. Two distinct periodicities, $1103\pm260$days and $243\pm29$days, are identified in the ZTF g/r-band light curves with confidence level exceeding $5\sigma$, through four different techniques. Meanwhile, the $1103\pm260$days periodicity is also clearly detected in the CSS V-band light curve. The optical periodicities suggest a BSMBH system candidate in SDSS J1004+1510, with an estimated total virial BH mass of $(1.13\pm0.14)\times10^8 M_{\odot}$ and a space separation of $0.0053\pm0.0016$pc for the periodicity of $1103\pm260$days. The second periodicity of $243\pm29$days could be attributed to harmonic oscillations, considering $(1103\pm260)/(243\pm29)\sim4.54\pm0.47$ with large scatters. However, if the periodicity of $243\pm29$days was from an independent QPO, a triple BH system candidate on sub-pc scale could be probably expected, with space separations of $0.00036\pm0.00004$pc between a close BSMBH system and of $0.0053\pm0.0016$pc between the BSMBH system and the third BH, after considering similar BH mass of the third BH as the total mass of the central BSMBH. These findings strongly demonstrate that combined light curves from the different sky survey projects can lead to more reliable QPOs candidates to be detected, and also indicate higher quality light curves could be helpful to find probably potential QPOs with multiple periodicities leading to rare detections of candidates for sub-pc triple BH systems.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06006-b31b1b.svg)](https://arxiv.org/abs/2505.06006) | **Periodic variation of magnetoionic environment of a fast radio burst source**  |
|| J. Xu, et al. -- incl., <mark>K. Lee</mark> |
|*Appeared on*| *2025-05-12*|
|*Comments*| *Submitted, comments are welcome*|
|**Abstract**|            Fast radio bursts (FRBs) are luminous, dispersed millisecond-duration radio bursts whose origin is poorly known. Recent observations suggest that some FRBs may reside in binary systems, even though conclusive evidence remains elusive. Here we report the detection of a 26.24$\pm$0.02 day periodicity in Faraday rotation measure (RM) of an actively repeating source named FRB 20201124A. The detection was made from 3,106 bursts collected with the Five-hundred-meter Aperture Spherical radio Telescope (FAST) over ~365 days. The RM periodicity is coherently phase-connected across ~14 cycles over a 1-year duration. Our detection of RM periodicity corresponds to a natural logarithmic Bayesian factor of 1,168. The detection significances vary between 5.9-34 {\sigma} under different assumptions. Such a periodicity provides evidence for the binary nature of FRB 20201124A, where the periodic RM variations arise from the orbital motion of the FRB source within the magnetoionic environment of the system. Together with previous observations, our result suggests that being in binary systems may be a common feature for actively repeating FRB sources.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06167-b31b1b.svg)](https://arxiv.org/abs/2505.06167) | **Pitch Angle Measurement Method based on Detector Counts Distribution. -I. Basic conception**  |
|| C. Wang, et al. -- incl., <mark>J. Liu</mark>, <mark>Y. Wang</mark> |
|*Appeared on*| *2025-05-12*|
|*Comments*| **|
|**Abstract**|            As an X-ray and gamma-ray all-sky monitor aiming for high energy astrophysical transients, Gravitational-wave high-energy Electromagnetic Counterpart All-sky Monitor (GECAM) has also made a series of observational discoveries on burst events of gamma-rays and particles in the low Earth orbit. Pitch angle is one of the key parameters of charged particles traveling around geomagnetic field. However, the usage of the GECAM-style instruments to measure the pitch angle of charged particles is still lacking. Here we propose a novel method for GECAM and similar instruments to measure the pitch angle of charged particles based on detector counts distribution. The basic conception of this method and simulation studies are described. With this method, the pitch angle of a peculiar electron precipitation event detected by GECAM-C is derived to be about 90$^\circ$, demonstrating the feasibility of our method. We note that the application of this method on GECAM-style instruments may open a new window for studying space particle events, such as Terrestrial Electron Beams (TEBs) and Lightning-induced Electron Precipitations (LEPs).         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2505.06192-b31b1b.svg)](https://arxiv.org/abs/2505.06192) | **GECAM Discovery of Peculiar Oscillating Particle Precipitation Events**  |
|| C. Wang, et al. -- incl., <mark>J. Liu</mark>, <mark>Y. Wang</mark> |
|*Appeared on*| *2025-05-12*|
|*Comments*| **|
|**Abstract**|            Charged particle precipitation typically manifests as a gradual increase and decrease of flux observed by space detectors. Cases with rapidly flux variation are very rare. Periodic events are even more extraordinary. These oscillating particle precipitation (OPP) events are usually attributed to the bounce motion of electrons, which are induced by lightning. Owing to the observation limitations, there has been debate regarding whether these oscillations originate from temporal flux evolution or spatial structure evolution. Here we report three peculiar charged particle precipitation events detected by GECAM during a geomagnetic storm on March 21, 2024, with two exhibiting significant periodicity. These events were observed around the same region during three consecutive orbits. Through comprehensive temporal and spectral analyses, we revealed that one of the OPP events exhibited a transition in spectral lag of mini-pulses, shifting from "softer-earlier" to "softer-later" while showing no significant time evolution in overall frequency characteristics. And there is no association found between these two OPP events and lightning activity. Several possible scenarios are discussed to explain these charged particles with a life time of more than 3.5 hours, but the nature of these three events remains an enigma. We suggest that these GECAM-detected OPP events may represent a new type of particle precipitation event or a peculiar Lightning-induced Electron Precipitations (LEPs).         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2505.05578/./mass_accrate.png', 'tmp_2505.05578/./medians.png', 'tmp_2505.05578/./surface_density.png']
copying  tmp_2505.05578/./mass_accrate.png to _build/html/
copying  tmp_2505.05578/./medians.png to _build/html/
copying  tmp_2505.05578/./surface_density.png to _build/html/
exported in  _build/html/2505.05578.md
    + _build/html/tmp_2505.05578/./mass_accrate.png
    + _build/html/tmp_2505.05578/./medians.png
    + _build/html/tmp_2505.05578/./surface_density.png
found figures ['tmp_2505.06093/./figures/CCD_CMD.png', 'tmp_2505.06093/./figures/4.9-5.3_region_overview.png', 'tmp_2505.06093/./figures/23.2-24.25_region_overview.png']
copying  tmp_2505.06093/./figures/CCD_CMD.png to _build/html/
copying  tmp_2505.06093/./figures/4.9-5.3_region_overview.png to _build/html/
copying  tmp_2505.06093/./figures/23.2-24.25_region_overview.png to _build/html/
exported in  _build/html/2505.06093.md
    + _build/html/tmp_2505.06093/./figures/CCD_CMD.png
    + _build/html/tmp_2

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\tildea}[1]{\overset{\sim}{#1}}$
$\newcommand{\rev}[1]{#1}$
$\newcommand{\theequation}{A.\arabic{equation}}$</div>



<div id="title">

# The survivorship bias of protoplanetary disc populations: Internal photoevaporation causes an apparent increase of the median total disc mass with time

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2505.05578-b31b1b.svg)](https://arxiv.org/abs/2505.05578)<mark>Appeared on: 2025-05-12</mark> -  _14 pages, 15 figures_

</div>
<div id="authors">

L. A. Malanga, et al. -- incl., <mark>A. Somigliana</mark>

</div>
<div id="abstract">

**Abstract:** The evolution of protoplanetary discs has a substantial impact on theories of planet formation.   To date, $\rev{neither}$ of the two main competing evolutionary models, namely the viscous-photoevaporative paradigm and the MHD winds model, has been ruled out by observations.   Due to the high number of sources observed by large surveys, population synthesis is a powerful tool to distinguish the evolution mechanism in observations.   We explore the evolution of the mass distribution of synthetic populations under the assumptions of turbulence-driven accretion and dispersal caused by internal photoevaporation.   We find that the rapid removal of light discs often results in an apparent increase of the median mass of the survived disc population.   This occurs both when the disc properties are independent of each other, and when typical correlations between these quantities and the stellar mass are assumed.   Furthermore, as MHD wind-driven accretion rarely manifests the same feature, this serves as a signature of the viscous-photoevaporative evolution when dispersal proceeds from inside-out.   Therefore, we propose the evolution of the median mass as a new method to distinguish this model in observed populations.   This survivorship bias is not shown by the median accretion rate, which, instead, decreases with time.   Moreover, we introduce a new criterion that estimates the disc lifetime as a function of initial conditions and an analytical relation to predict whether internal photoevaporation triggers an inside-out or an outside-in dispersal.   We verify both analytical relations with numerical simulations.

</div>

<div id="div_fig1">

<img src="tmp_2505.05578/./mass_accrate.png" alt="Fig2" width="100%"/>

**Figure 2. -** Evolution of the accretion rate (blue) and mass (black) of a disc with time. The vertical yellow line indicates the instant when the gap opens, which triggers a steep decrease of $\dot{M$. The red line corresponds to the transition between accreting and non-accreting. The grey region corresponds to lower values of disc mass and accretion rate than the respective observational threshold.
        } (*fig:mass_accrate*)

</div>
<div id="div_fig2">

<img src="tmp_2505.05578/./medians.png" alt="Fig6" width="100%"/>

**Figure 6. -** \small{Comparison between the evolution of the median of the synthetic mass distribution with time and the analytical prediction for both dispersal criteria. The shaded region corresponds to $t<t_\nu$, which is not considered in the model.
        } (*fig:medians*)

</div>
<div id="div_fig3">

<img src="tmp_2505.05578/./surface_density.png" alt="Fig1" width="100%"/>

**Figure 1. -** Evolution of the surface density of a disc with time, obtained integrating equation \eqref{eqn:master_pe with \texttt{Diskpop}. This disc has $M_0 = 1.5 \cdot 10^{-2}$ M$_\odot$, $R_0 = 32.8$ au, $\alpha = 10^{-3}$ and $\dot{M}_\text{w} = 2.5 \cdot 10^{-9}$ M$_\odot$ yr$^{-1}$. After the opening of the gap, the inner disc is dispersed within a short timescale.
        } (*fig:surfacedensity_gap*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2505.05578"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\green}[1]{\textcolor{mygreen}{#1}}$
$\newcommand{\red}[1]{\textcolor{myred}{#1}}$
$\newcommand{\yellow}[1]{\textcolor{myyellow}{#1}}$
$\newcommand{\purple}[1]{\textcolor{mypurple}{#1}}$
$\newcommand{\arraystretch}{1.3}$
$\newcommand{\nodata}{...}$
$\newcommand{\yes}{\green{\ding{51}}}$
$\newcommand{\no}{\red{\ding{55}}}$
$\newcommand{\maybe}{\yellow{\ding{51}^{?}}}$
$\newcommand{\ngc6357}{NGC 6357}$
$\newcommand{\mys}{MYStIX}$
$\newcommand{\Hii}{H {\sc ii}}$
$\newcommand{\micron}{\mum}$
$\newcommand{\kms}{km s^{-1}}$
$\newcommand{\cmss}{cm s^{-2}}$
$\newcommand{\lsol}{L_{\odot}}$
$\newcommand{\msun}{M_{\odot}}$
$\newcommand{\msol}{M_{\odot}}$
$\newcommand{\rsol}{R_{\odot}}$
$\newcommand{\rsun}{R_{\odot}}$
$\newcommand{\Rsun}{R_{\odot}}$
$\newcommand{\s}{\sigma}$
$\newcommand{\w}{\omega}$
$\newcommand{\vsini}{v \sin i}$
$\newcommand{\sigrms}{\sigma_\mathrm{rms}}$
$\newcommand{\srv}{\sigma_\mathrm{RV}}$
$\newcommand{\Msol}{M_\odot}$
$\newcommand{\Msun}{M_\odot}$
$\newcommand{\Lsol}{L_\odot}$
$\newcommand{\Lsun}{L_\odot}$
$\newcommand{\s}{\sigma}$
$\newcommand{\feros}{{\sc feros}}$
$\newcommand{\lco}{{\sc lco}}$
$\newcommand{\uves}{{\sc uves}}$
$\newcommand{\iacob}{{\sc iacob}}$
$\newcommand{\l}{\lambda}$
$\newcommand{\ll}{\lambda\lambda}$
$\newcommand{\palp}{Pa~\alpha}$
$\newcommand{\palph}{Pa~\alpha}$
$\newcommand{\palpha}{Pa~\alpha}$
$\newcommand{\pbet}{Pa~\beta}$
$\newcommand{\pbeta}{Pa~\beta}$
$\newcommand{\pdelt}{Pa~\delta}$
$\newcommand{\pgam}{Pa~\gamma}$
$\newcommand{\peps}{Pa~\epsilon}$
$\newcommand{\halp}{H~\alpha}$
$\newcommand{\halph}{H~\alpha}$
$\newcommand{\halpha}{H~\alpha}$
$\newcommand{\hbet}{H~\beta}$
$\newcommand{\hdelt}{H~\delta}$
$\newcommand{\hgam}{H~\gamma}$
$\newcommand{\ha}{H {\sc i}}$
$\newcommand{\hb}{H {\sc ii}}$
$\newcommand{\hea}{He {\sc i}}$
$\newcommand{\heb}{He {\sc ii}}$
$\newcommand{\nc}{N {\sc iii}}$
$\newcommand{\fea}{Fe {\sc i}}$
$\newcommand{\nd}{N {\sc iv}}$
$\newcommand{\ne}{N {\sc v}}$
$\newcommand{\mgb}{Mg {\sc ii}}$
$\newcommand{\ob}{O {\sc ii}}$
$\newcommand{\sic}{Si {\sc iii}}$
$\newcommand{\sid}{Si {\sc iv}}$
$\newcommand{\H2O}{H_{2}O}$
$\newcommand{\C2H2}{C_{2}H_{2}}$
$\newcommand{\CO2}{^{12}CO_{2}}$
$\newcommand{\13CO}{^{13}CO_{2}}$
$\newcommand{\13CO2}{^{13}CO_{2}}$
$\newcommand\CH{3+}$</div>



<div id="title">

# XUE. JWST spectroscopy of externally irradiated disks\ around young intermediate-mass stars$\thanks{Table \ref{tab:full_properties}, containing all the properties of the XUE sources is available in online form.}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2505.06093-b31b1b.svg)](https://arxiv.org/abs/2505.06093)<mark>Appeared on: 2025-05-12</mark> -  _12 pages, 8 appendix, 16 figures, 2 tables. Submitted to A&A_

</div>
<div id="authors">

M. C. Ramírez-Tannus, et al. -- incl., <mark>A. Winter</mark>, <mark>T. Henning</mark>

</div>
<div id="abstract">

**Abstract:** Our knowledge of the initial conditions of terrestrial planet formation is mainly based on the study of  protoplanetary disks around nearby isolated low-mass stars. However, most young stars and therefore planetary systems form in high-mass star forming regions and are exposed to ultraviolet radiation, affecting the protoplanetary disk. These regions are located at large distances and only now with JWST become accessible to study the inner disks surrounding young stars. We present the eXtreme UV Environments (XUE) program, which provides the first detailed characterization of the physical and chemical properties of the inner disks  around young intermediate-mass ( $1$ – $4$  $\msun$ )  stars exposed to external irradiation from nearby massive stars. We present high signal to noise MIRI-MRS spectroscopy of 12 disks located in three sub-clusters of the high-mass star-forming region NGC 6357 ( $d\sim1690$ pc). Based on their mid-infrared spectral energy distribution, we classify the XUE sources into Group I and II based on the Meeus scheme. We analyze their molecular emission features, and compare their spectral indices and 10 $\micron$ silicate emission profiles to those of nearby Herbig and intermediate T Tauri (IMTT) disks. The XUE program provides the first detailed characterization of the rich molecular inventory within 10 au of IMTT disks, including water, CO, $CO_2$ , HCN, and $\C$ 2H2.  Despite being more massive, the XUE stars host disks with molecular richness comparable to isolated T Tauri systems. The spectral indices are also consistent with similar-mass stars in nearby regions. The 10 $\micron$ silicate features in the XUE sample show lower F $_{11.3}$ /F $_{9.8}$ ratios at a given F $_{\mathrm{peak}}$ , although uncertainties in extinction prevent firm conclusions about their inner disk properties. The majority of disks display water emission from the inner disk, suggesting that even in these extreme environments rocky planets can form in the presence of water. Only one object shows PAH emission, contrasting with the higher PAH detection rates in IMTT surveys from lower-UV environments. The absence of strong line fluxes and other irradiation signatures suggests that the XUE disks have been truncated by external UV photons. However, this truncation does not appear to significantly impact the chemical richness of their inner regions. These findings indicate that even in extreme environments, IMTT disks can retain the ingredients necessary for rocky planet formation, comparable to those of lower mass T Tauri disks in low-mass star-forming regions.

</div>

<div id="div_fig1">

<img src="tmp_2505.06093/./figures/CCD_CMD.png" alt="Fig13" width="100%"/>

**Figure 13. -** Characteristics of XUE stars. In all panels, XUE stars are color-coded and labeled for clarity. The Orion Nebula young stars are represented by blue plus symbols, and NGC 6357 young stars are shown as grey crosses \citep[][]{2005ApJS..160..319G, 2013ApJS..209...32B, 2013ApJS..209...28K, 2019ApJS..244...28T, 2022ApJ...935...43G}. (a, b) UKIRT/VVV NIR color-magnitude and color-color diagrams. PARSEC 1.2S PMS isochrones for 0.7 and 2 Myr are shown as solid and dashed curves, respectively. Reddening vectors corresponding to $A_V = 10$ mag are drawn as solid and dashed lines, originating from G0 stars at 0.7 Myr and F0 stars at 2 Myr. (c) Spitzer-IRAC MIR color-color diagram of NGC 6357 young stars with known IRAC photometry. Grey squares represent known disky stars, while purple circles indicate diskless stars \citep[][]{2013ApJS..209...31P}. (d) Absolute $K$-band magnitude (uncorrected for extinction) as a function of stellar mass. (e) Intrinsic X-ray luminosity as a function of stellar mass. (f) X-ray column density plotted against visual-band extinction.
    The names of the sources are displayed on the markers and are visible when zooming in. (*fig:CMD_CCD*)

</div>
<div id="div_fig2">

<img src="tmp_2505.06093/./figures/4.9-5.3_region_overview.png" alt="Fig15" width="100%"/>

**Figure 15. -** Overview of the spectral region between 4.9 and 5.3 $\micron$ for all XUE sources. The lowermost spectrum shows the most prominent molecules in this region; CO at a temperature of 1500 K and a column density of 3$\times10^{17}$ cm$^{-2}$(yellow) and $H_2$O at 850 K and $10^{18}$ cm$^{-2}$(blue). (*fig:4.9-5.3_region*)

</div>
<div id="div_fig3">

<img src="tmp_2505.06093/./figures/23.2-24.25_region_overview.png" alt="Fig7" width="100%"/>

**Figure 7. -** Same as Figure \ref{fig:4.9-5.3_region} but for the region between 23.2 and 24.3 $\micron$. The lowermost spectrum shows warm and hot $H_2$O at 400 K and $2.2\times10^{16}$ cm$^{-2}$(blue) and at 900 K and $2.2\times10^{16}$ cm$^{-2}$(purple). (*fig:23.2-24.25_region*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2505.06093"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

484  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
