# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

N. Bachmann  ->  N. Bachmann  |  ['N. Bachmann']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
P. Mollière  ->  P. Mollière  |  ['P. Mollière']
F. Zagaria  ->  F. Zagaria  |  ['F. Zagaria']
H. Jiang  ->  H. Jiang  |  ['H. Jiang']
M. Benisty  ->  M. Benisty  |  ['M. Benisty']
D. Fasano  ->  D. Fasano  |  ['D. Fasano']
I. Hammond  ->  I. Hammond  |  ['I. Hammond']
A. Winter  ->  A. Winter  |  ['A. Winter']
K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
H. Klahr  ->  H. Klahr  |  ['H. Klahr']


Arxiv has 104 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2506.16232
extracting tarball to tmp_2506.16232...

 done.


N. Bachmann  ->  N. Bachmann  |  ['N. Bachmann']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
P. Mollière  ->  P. Mollière  |  ['P. Mollière']


Found 73 bibliographic references in tmp_2506.16232/aa55577-25.bbl.
Retrieving document from  https://arxiv.org/e-print/2506.16481
extracting tarball to tmp_2506.16481... done.


F. Zagaria  ->  F. Zagaria  |  ['F. Zagaria']
H. Jiang  ->  H. Jiang  |  ['H. Jiang']
M. Benisty  ->  M. Benisty  |  ['M. Benisty']
D. Fasano  ->  D. Fasano  |  ['D. Fasano']
I. Hammond  ->  I. Hammond  |  ['I. Hammond']
A. Winter  ->  A. Winter  |  ['A. Winter']


Found 178 bibliographic references in tmp_2506.16481/ms.bbl.
Retrieving document from  https://arxiv.org/e-print/2506.16513
extracting tarball to tmp_2506.16513...

 done.
Retrieving document from  https://arxiv.org/e-print/2506.17024
extracting tarball to tmp_2506.17024...

 done.


H. Klahr  ->  H. Klahr  |  ['H. Klahr']


Found 70 bibliographic references in tmp_2506.17024/aa52153-24.bbl.
Issues with the citations
syntax error in line 471: '}' expected


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.16232-b31b1b.svg)](https://arxiv.org/abs/2506.16232) | **Osiris revisited: Confirming a solar metallicity and low C/O in HD 209458b**  |
|| <mark>N. Bachmann</mark>, et al. -- incl., <mark>L. Kreidberg</mark>, <mark>P. Mollière</mark> |
|*Appeared on*| *2025-06-23*|
|*Comments*| *22 pages, 18 figures, 9 tables; accepted for publication in A&A*|
|**Abstract**|            HD 209458b is the prototypical hot Jupiter and one of the best targets available for precise atmosphere characterisation. Now that spectra from both Hubble Space Telescope (HST) and James Webb Space Telescope (JWST) are available, we can reveal the atmospheric properties in unprecedented detail. In this study, we perform a new data reduction and analysis of the original HST/WFC3 spectrum, accounting for the wavelength dependence of the instrument systematics that was not considered in previous analyses. This allows us to precisely and robustly measure the much-debated H$_2$O abundance in HD 209458b's atmosphere. We combine the newly reduced spectrum with archival JWST/NIRCam data and run free chemistry atmospheric retrievals over the 1.0 - 5.1 $\mu$m wavelength range, covering possible features of multiple absorbing species, including CO$_2$, CO, CH$_4$, NH$_3$, HCN, Na, SO$_2$, and H$_2$S. We detect H$_2$O and CO$_2$ robustly at above 7 $\sigma$ significance, and find a 3.6 $\sigma$ preference for cloudy models compared to a clear atmosphere. For all other absorbers we tested, only upper limits of abundance can be measured. We use Bayesian model averaging to account for a range of different assumptions about the cloud properties, resulting in a water volume mixing ratio of $0.95^{+0.35}_{-0.17} \:\times$ solar and a carbon dioxide abundance of $0.94^{+0.16}_{-0.09} \:\times$ solar. Both results are consistent with solar values and comparable to predictions from the VULCAN 1D photochemistry model. Combining these values with a prior on the CO abundance from ground-based measurements, we derive an overall atmospheric composition comparable to solar metallicity of $\mathrm{[M/H]} = 0.10^{+0.41}_{-0.40}$ and very low C/O of $0.054^{+0.080}_{-0.034}$ with a 3 $\sigma$ upper limit of 0.454. This indicates a strong enrichment in oxygen and depletion in carbon during HD 209458b's formation.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.16481-b31b1b.svg)](https://arxiv.org/abs/2506.16481) | **SO emission in the dynamically perturbed protoplanetary disks around CQ Tau and MWC 758**  |
|| <mark>F. Zagaria</mark>, et al. -- incl., <mark>H. Jiang</mark>, <mark>M. Benisty</mark>, <mark>D. Fasano</mark>, <mark>I. Hammond</mark>, <mark>A. Winter</mark> |
|*Appeared on*| *2025-06-23*|
|*Comments*| *Accepted for publication in ApJ. 23 pages 7 figures*|
|**Abstract**|            We report the serendipitous detection of the SO $J_N=6_5-5_4$ (219.949 GHz) rotational transition in archival Atacama Large Millimeter/submillimeter Array (ALMA) observations of the spiral hosting protoplanetary disks around CQ Tau (with $\approx4.9\sigma$ significance) and MWC 758 (with $\approx3.4\sigma$ significance). In the former, the SO emission comes in the shape of a ring, arises from the edge of the continuum cavity, and is qualitatively consistent, at the currently available spectral resolution, with being in Keplerian rotation. In the latter, instead, while arising primarily from inside the continuum cavity, the SO emission also extends to the continuum ring(s), and its morphology and kinematics are less clear. We put these sources in the context of the other protoplanetary disks where SO detections have been previously reported in the literature and discuss the possible origins of SO in terms of (thermal) desorption or formation in the gas phase. We argue that these processes might be fostered by dynamical perturbations caused by unseen embedded massive companions, shadows, or late-time infall, thus suggesting a possible link between perturbed dynamics and SO emission in (these) protoplanetary disks. If confirmed, our interpretation would imply that chemical evolution timescales could be significantly shorter in these systems than is commonly assumed, indicating that dynamical perturbations might influence the composition of newborn (proto-)planets by altering the volatile makeup of their formation environment.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.17024-b31b1b.svg)](https://arxiv.org/abs/2506.17024) | **Feasibility of interferometric observations and characterization of planet-induced structures at sub au to au scales in protoplanetary disks**  |
|| L. Hildebrandt, et al. -- incl., <mark>H. Klahr</mark> |
|*Appeared on*| *2025-06-23*|
|*Comments*| *Accepted for publication in A&A, 16 pages, 11 figures, 1 table*|
|**Abstract**|            Interferometric observations of protoplanetary disks by VLTI and ALMA have greatly improved our understanding of the detailed structure of these planetary birthplaces. These observations have revealed a variety of large-scale disk substructures, including rings, gaps, and spirals, spanning tens to hundreds of au, supporting the predictions of planet formation models. Recent instruments, such as MATISSE at the VLTI, allow one to resolve and investigate the inner few au of protoplanetary disks in nearby star formation regions, shedding light on the traces of planet formation and evolution at these small scales. The aim of this work is to assess the feasibility of interferometric observations of small-scale planet-induced substructures in protoplanetary disks in nearby star-forming regions. We aim to characterize these substructures in multi-wavelength and multi-epoch observations and subsequently differentiate between simulation parameters. On the basis of 3D hydrodynamic simulations of embedded planetary companions and subsequent 3D Monte Carlo radiative transfer simulations, we calculated and analyzed interferometric observables, assuming observations with the VLTI in the K, L, M, and N bands. The hydrodynamic simulations exhibit mass-dependent planet-induced density waves that create observable substructures, most notably for the considered case of a 300 $M_{\oplus}$ planet. These substructures share similarities with observed large-scale structures and feature a prominent accretion region around the embedded planet. The visibilities show a detectable variability for multi-epoch VLTI/GRAVITY and VLTI/MATISSE observations, caused by the orbital motion of the planet, that are distinguishable from other sources of variability due to their unique combination of timescale and amplitude.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.16513-b31b1b.svg)](https://arxiv.org/abs/2506.16513) | **10,000 Resolved Triples from Gaia: Empirical Constraints on Triple Star Populations**  |
|| C. Shariat, <mark>K. El-Badry</mark>, S. Naoz |
|*Appeared on*| *2025-06-23*|
|*Comments*| *Submitted to PASP, all comments are welcome. Relevant code and data can be found at this https URL*|
|**Abstract**|            We present a catalog of $\sim 10,000$ resolved triple star systems within 500 pc of the Sun, constructed using Gaia data. The triples include main-sequence, red giant, and white dwarf components spanning separations of 10 to 50,000 au. A well-characterized selection function allows us to constrain intrinsic demographics of the triple star population. We find that (a) all systems are compatible with being hierarchical and dynamically stable; (b) mutual orbital inclinations are isotropic for wide triples but show modest alignment as the systems become more compact; (c) primary masses follow a Kroupa initial mass function weighted by the triple fraction; (d) inner binary orbital periods, eccentricities, and mass ratios mirror those of isolated binaries, including a pronounced twin excess (mass ratios greater than 0.95) out to separations of 1000+ au, suggesting a common formation pathway; (e) tertiary mass ratios follow a power-law distribution with slope -1.4; (f) tertiary orbits are consistent with a log-normal period distribution and thermal eccentricities, subject to dynamical stability. Informed by these observations, we develop a publicly available prescription for generating mock triple star populations. Finally, we estimate the catalog's completeness and infer the intrinsic triple fraction, which rises steadily with primary mass: from $5\%$ at $\lesssim 0.5\,{\rm M_\odot}$ to $35\%$ at $2\,{\rm M_\odot}$. The public catalog provides a robust testbed for models of triple star formation and evolution.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2506.16232/./figures/opacity_contributions_small_CO_prior_with_pressure_lines.png', 'tmp_2506.16232/./figures/compare_Vulcan_model_with_S.png', 'tmp_2506.16232/./figures/BMA_joined_ret_H2O.png', 'tmp_2506.16232/./figures/BMA_joined_ret_CO2.png']
copying  tmp_2506.16232/./figures/opacity_contributions_small_CO_prior_with_pressure_lines.png to _build/html/
copying  tmp_2506.16232/./figures/compare_Vulcan_model_with_S.png to _build/html/
copying  tmp_2506.16232/./figures/BMA_joined_ret_H2O.png to _build/html/
copying  tmp_2506.16232/./figures/BMA_joined_ret_CO2.png to _build/html/
exported in  _build/html/2506.16232.md
    + _build/html/tmp_2506.16232/./figures/opacity_contributions_small_CO_prior_with_pressure_lines.png
    + _build/html/tmp_2506.16232/./figures/compare_Vulcan_model_with_S.png
    + _build/html/tmp_2506.16232/./figures/BMA_joined_ret_H2O.png
    + _build/html/tmp_2506.16232/./figures/BMA_joined_ret_CO2.png
found figures ['tmp_2506.16481/./figures/summ

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\planet}{HD~209458 b}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$</div>



<div id="title">

# Osiris revisited: Confirming a solar metallicity and low C/O in $\planet$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.16232-b31b1b.svg)](https://arxiv.org/abs/2506.16232)<mark>Appeared on: 2025-06-23</mark> -  _22 pages, 18 figures, 9 tables; accepted for publication in A&A_

</div>
<div id="authors">

<mark>N. Bachmann</mark>, et al. -- incl., <mark>L. Kreidberg</mark>, <mark>P. Mollière</mark>

</div>
<div id="abstract">

**Abstract:** $\planet$ is the prototypical hot Jupiter and one of the best targets available for precise atmosphere characterisation. Now that spectra from both Hubble Space Telescope (HST) and James Webb Space Telescope (JWST) are available, we can reveal the atmospheric properties in unprecedented detail. In this study, we perform a new data reduction and analysis of the original HST/WFC3 spectrum, accounting for the wavelength dependence of the instrument systematics that was not considered in previous analyses. This allows us to precisely and robustly measure the much-debated $\mathrm{H_2O}$ abundance in $\planet$ 's atmosphere. We combine the newly reduced spectrum with archival JWST/NIRCam data and run free chemistry atmospheric retrievals over the $\SI{1.0}  - \SI{5.1}{\micro\metre}$ wavelength range, covering possible features of multiple absorbing species, including $\mathrm{CO_2}$ , $\mathrm{CO}$ , $\mathrm{CH_4}$ , $\mathrm{NH_3}$ , $\mathrm{HCN}$ , $\mathrm{Na}$ , $\mathrm{SO_2}$ , and $\mathrm{H_2S}$ . We detect $\mathrm{H_2O}$ and $\mathrm{CO_2}$ robustly at above $\SI{7}{\sigma}$ significance, and find a $\SI{3.6}{\sigma}$ preference for cloudy models compared to a clear atmosphere. For all other absorbers we tested, only upper limits of abundance can be measured. We use Bayesian model averaging to account for a range of different assumptions about the cloud properties, resulting in a water volume mixing ratio of $0.95^{+0.35}_{-0.17} \:\times$ solar and a carbon dioxide abundance of $0.94^{+0.16}_{-0.09} \:\times$ solar. Both results are consistent with solar values and comparable to predictions from the \texttt{VULCAN} 1D photochemistry model. Combining these values with a prior on the $\mathrm{CO}$ abundance from ground-based measurements, we derive an overall atmospheric composition comparable to solar metallicity of $\mathrm{[M/H]} = 0.10^{+0.41}_{-0.40}$ and very low C/O of $0.054^{+0.080}_{-0.034}$ with a $\SI{3}{\sigma}$ upper limit of $0.454$ . This indicates a strong enrichment in oxygen and depletion in carbon during $\planet$ 's formation.

</div>

<div id="div_fig1">

<img src="tmp_2506.16232/./figures/opacity_contributions_small_CO_prior_with_pressure_lines.png" alt="Fig12" width="100%"/>

**Figure 12. -** Contribution of the opacities of line-absorbing species to the transmission spectrum. The best-fit spectrum shows the cloudy+CO prior model (see text). The absorption features of $\mathrm{H_2O}$ and $\mathrm{CO_2}$ are clearly visible, for the latter due to its strong absorption even though the abundance is low (see \cref{fig:BMA_joint_H2O_CO2}). (*fig:opacity_contribution*)

</div>
<div id="div_fig2">

<img src="tmp_2506.16232/./figures/compare_Vulcan_model_with_S.png" alt="Fig5" width="100%"/>

**Figure 5. -** Comparison of the cloudy+CO prior retrieval results with the \texttt{VULCAN} 1D photochemical kinetics model by [Tsai, Malik and Kitzmann (2021)]()(solid lines) and thermochemical equilibrium abundances (dashed lines). The dots represent the volume mixing ratios from our retrievals (with errorbars for $\mathrm{H_2O}$, $\mathrm{CO_2}$, and $\mathrm{CO}$; upper limits for $\mathrm{CH_4}$, $\mathrm{NH_3}$, $\mathrm{HCN}$, $\mathrm{SO_2}$, and $\mathrm{H_2S}$). The shaded regions represent the atmospheric pressure levels at which the absorption is most active for the species, starting at the cloud level at $\SI{1.44}{\milli\bar}$(see \cref{fig:opacity_contribution} and \cref{tab:joint_ret_results}). For $\mathrm{CO_2}$(light green) and $\mathrm{H_2O}$(light blue), the absorption pressure level reaches higher up in the atmosphere. The Bayesian model averaged results are similar, except for the $\mathrm{H_2O}$, which is smaller than the \texttt{VULCAN} 1D results, and the $\mathrm{CO_2}$ abundance, which agrees with the model prediction. (*fig:compare_VULCAN*)

</div>
<div id="div_fig3">

<img src="tmp_2506.16232/./figures/BMA_joined_ret_H2O.png" alt="Fig11.1" width="50%"/><img src="tmp_2506.16232/./figures/BMA_joined_ret_CO2.png" alt="Fig11.2" width="50%"/>

**Figure 11. -** Bayesian model averaged posterior distributions for $\mathrm{H_2O}$(left) and $\mathrm{CO_2}$(right). The black line indicates the median of the distributions, and the dashed lines are the $\pm34.1\%$ confidence regions. The orange lines show the solar value of $\mathrm{log (\chi_{H_2O})} = -3.70$ and $\mathrm{log (\chi_{CO_2})} = -7.05$ at $\SI{1}{\milli\bar}$ and $\SI{1200}{\kelvin}$. (*fig:BMA_joint_H2O_CO2*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.16232"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\hjc}[1]{\textcolor{emerald}{[\small HJ: \textit{\small #1}]}}$
$\newcommand{\hjadd}[1]{\textcolor{emerald}{#1}}$
$\newcommand{\fg}[1]{Fig.~\ref{fig:#1}}$
$\newcommand{\Fg}[1]{Figure~\ref{fig:#1}}$
$\newcommand{\fgs}[2]{Figs. \ref{fig:#1} and \ref{fig:#2}}$
$\newcommand{\Fgs}[2]{Figures \ref{fig:#1} and \ref{fig:#2}}$
$\newcommand{\eq}[1]{Eq.~(\ref{eq:#1})\xspace}$
$\newcommand{\Eq}[1]{Equation~(\ref{eq:#1})\xspace}$
$\newcommand{\eqs}[2]{Eqs. (\ref{eq:#1}) and (\ref{eq:#2})}$
$\newcommand{\Eqs}[2]{Equations \ref{eq:#1} and \ref{eq:#2}}$
$\newcommand{\tb}[1]{Table~\ref{tab:#1}\xspace}$
$\newcommand{\Tb}[1]{Table~\ref{tab:#1}\xspace}$
$\newcommand{\se}[1]{Sect.~\ref{sec:#1}\xspace}$
$\newcommand{\Se}[1]{Section~\ref{sec:#1}\xspace}$
$\newcommand{\ses}[2]{Sects. \ref{sec:#1} and \ref{sec:#2}}$
$\newcommand{\sef}[1]{\ref{sec:#1}\xspace}$
$\newcommand{\App}[1]{Appendix~\ref{app:#1}\xspace}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\cyan}[1]{\textcolor{cyan}{#1}}$
$\newcommand{\arraystretch}{1.25}$</div>



<div id="title">

# SO emission in the dynamically perturbed protoplanetary disks around CQ Tau and MWC 758

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.16481-b31b1b.svg)](https://arxiv.org/abs/2506.16481)<mark>Appeared on: 2025-06-23</mark> -  _Accepted for publication in ApJ. 23 pages 7 figures_

</div>
<div id="authors">

<mark>F. Zagaria</mark>, et al. -- incl., <mark>H. Jiang</mark>, <mark>M. Benisty</mark>, <mark>D. Fasano</mark>, <mark>I. Hammond</mark>, <mark>A. Winter</mark>

</div>
<div id="abstract">

**Abstract:** We report the serendipitous detection of the SO $J_N=6_5-5_4$ (219.949 GHz) rotational transition in archival Atacama Large Millimeter/submillimeter Array (ALMA) observations of the spiral hosting protoplanetary disks around CQ Tau (with $\approx4.9\sigma$ significance) and MWC 758 (with $\approx3.4\sigma$ significance). In the former, the SO emission comes in the shape of a ring, arises from the edge of the continuum cavity, and is qualitatively consistent, at the currently available spectral resolution, with being in Keplerian rotation.In the latter, instead, while arising primarily from inside the continuum cavity, the SO emission also extends to the continuum ring(s) and its morphology and kinematics are less clear. We put these sources in the context of the other protoplanetary disks where SO detections have been previously reported in the literature and discuss the possible origins of SO in terms of (thermal) desorption or formation in the gas phase. We argue that these processes might be fostered by dynamical perturbations caused by unseen embedded massive companions, shadows, or late-time infall, thus suggestinga possible link between perturbed dynamics and SO emission in (these) protoplanetary disks. If confirmed, our interpretationwould imply that chemical evolution timescales could be significantly shorter in these systems than is commonly assumed,indicating that dynamical perturbations might influence the composition of newborn (proto-)planets by altering the volatile makeup of their formation environment.

</div>

<div id="div_fig1">

<img src="tmp_2506.16481/./figures/summary_SO_spectra.png" alt="Fig2" width="100%"/>

**Figure 2. -** Left: Shifted and stacked SO spectra (blue).
    The zoom-in inserts around the systematic velocity also display the native spectra (gray) for comparison. The plum areas highlight the velocity ranges adopted to measure the SO flux and generate moment maps (see \autoref{app:imaging}). Right: Teardrop plots. The dotted white lines mark the region where Keplerian emission from the disk is expected. Those correspond to the location where the CO emission (from the fiducial exoALMA cubes,  ([Teague, Benisty and Facchini 2025]()) ) drops below 3 K. The white arrows indicate the apertures over which the spectra were extracted and integrated over to measure the SO flux. The dust cavity radius is indicated with a white dashed line. SO emission is clearly detected and primarily originates within the cavity. (*fig:2_spectra*)

</div>
<div id="div_fig2">

<img src="tmp_2506.16481/./figures/summary_SO_images_new.png" alt="Fig3" width="100%"/>

**Figure 3. -** From left to right: SO integrated intensity ("moment 0"), peak intensity ("moment 8"), velocity ("moment 1") maps, and comparison between scattered-light images and SO peak intensity for CQ Tau (top row) and MWC 758 (bottom row). The dotted contours display the $[5,65]\times\sigma$(CQ Tau) and the $[5,40]\times\sigma$(MWC 758) emission levels. The white (black for panels 3c and 3g) solid contours, instead, mark the $[3,5]\times\sigma$ SO detection levels. The synthesized CLEAN beam is shown as an ellipse in the bottom left corner of each panel. The regions within 0$\farcs$1 of the scattered light images cannot be accessed because of the coronagraph and are masked out. (*fig:3_summary*)

</div>
<div id="div_fig3">

<img src="tmp_2506.16481/./figures/summary_continuum_vert.png" alt="Fig1" width="100%"/>

**Figure 1. -** 1.3 mm continuum emission map of CQ Tau (top) and MWC 758 (bottom). The dotted gray lines highlight the $[5,65]\times\sigma$(CQ Tau) and the $[5,40]\times\sigma$(MWC 758) emission contours. The ellipse in the bottom left corner of each panel displays the synthesized CLEAN beam. (*fig:1_continuum*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.16481"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\ME}{\(\text{M}_\Earth\) }$
$\newcommand{\ca}{\sim}$</div>



<div id="title">

# Feasibility of interferometric observations and characterization of planet-induced structures at sub au to au scales in protoplanetary disks

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2506.17024-b31b1b.svg)](https://arxiv.org/abs/2506.17024)<mark>Appeared on: 2025-06-23</mark> -  _Accepted for publication in A&A, 16 pages, 11 figures, 1 table_

</div>
<div id="authors">

L. Hildebrandt, et al. -- incl., <mark>H. Klahr</mark>

</div>
<div id="abstract">

**Abstract:** Interferometric observations of protoplanetary disks by VLTI and ALMA have greatly improved our understanding of the detailed structure of these planetary birthplaces.   These observations have revealed a variety of large-scale disk substructures, including rings, gaps, and spirals, spanning tens to hundreds of au, supporting the predictions of planet formation models.   Recent instruments, such as MATISSE at the VLTI, allow one to resolve and investigate the inner few au of protoplanetary disks in nearby star formation regions, shedding light on the traces of planet formation and evolution at these small scales. The aim of this work is to assess the feasibility of interferometric observations of small-scale planet-induced substructures in protoplanetary disks in nearby star-forming regions.   We aim to characterize these substructures in multi-wavelength and multi-epoch observations and subsequently differentiate between simulation parameters. On the basis of 3D hydrodynamic simulations of embedded planetary companions and subsequent 3D Monte Carlo radiative transfer simulations, we calculated and analyzed interferometric observables, assuming observations with the VLTI in the K, L, M, and N bands. The hydrodynamic simulations exhibit mass-dependent planet-induced density waves that create observable substructures, most notably for the considered case of a 300 $\ME$ planet. These substructures share similarities with observed large-scale structures and feature a prominent accretion region around the embedded planet. The visibilities show a detectable variability for multi-epoch VLTI/GRAVITY and VLTI/MATISSE observations, caused by the orbital motion of the planet, that are distinguishable from other sources of variability due to their unique combination of timescale and amplitude. Additionally, the non-uniform change of the visibilities at different baselines can be used to identify asymmetric structures. Furthermore, we show that multi-wavelength observations provide an approach to identify the fainter substructures and the signal of the accretion region.

</div>

<div id="div_fig1">

<img src="tmp_2506.17024/./Images/UV-planes/Visibilities_uv_all.png" alt="Fig4" width="100%"/>

**Figure 4. -** Simulated uv-plane visibilities in the K band (first row), L band (second), M band (third), and N band (fourth)  for the simulations with planet masses of 1 $\ME$(left column), 10 $\ME$(center), and 300 $\ME$(right).   (*fig:visibilities*)

</div>
<div id="div_fig2">

<img src="tmp_2506.17024/./Images/Fluxmaps/Fluxmaps_complete.png" alt="Fig3" width="100%"/>

**Figure 3. -** Flux maps for the simulations with planet masses of 1 $\ME$(left column), 10 $\ME$(center), and 300 $\ME$(right) in the K band (first row), L band (second), M band (third), and N band (fourth). The location of the planet, at a radial distance of 1 au north of the center, is indicated with a blue arrow in each simulation. The images are normalized with regard to the flux value of the central pixel where the star is located. (*fig:fluxcrossN*)

</div>
<div id="div_fig3">

<img src="tmp_2506.17024/./Images/Rotations/300_N_data_variance_rotation.png" alt="Fig6" width="100%"/>

**Figure 6. -** Interferometric variability in the simulated N band visibilities at radii in the uv-plane corresponding to all UT baselines for an embedded planet of 300 $\ME$. Shown are the maximum variability, dependent on baseline length (left), and the measured visibility for a given baseline, dependent on the orbital motion of the planet and other disk substructures (right). (*fig:varrot300*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2506.17024"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

535  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

14  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
