# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. A. Kahle  ->  K. A. Kahle  |  ['K. A. Kahle']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
P. Mollière  ->  P. Mollière  |  ['P. Mollière']
L. Acuña  ->  L. Acuña  |  ['L. Acuña']
C. Gapp  ->  C. Gapp  |  ['C. Gapp']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']


A. Winter  ->  A. Winter  |  ['A. Winter']
T. Henning  ->  T. Henning  |  ['T. Henning']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
Arxiv has 69 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2507.13439


extracting tarball to tmp_2507.13439...

 done.


K. A. Kahle  ->  K. A. Kahle  |  ['K. A. Kahle']
L. Kreidberg  ->  L. Kreidberg  |  ['L. Kreidberg']
P. Mollière  ->  P. Mollière  |  ['P. Mollière']
L. Acuña  ->  L. Acuña  |  ['L. Acuña']
C. Gapp  ->  C. Gapp  |  ['C. Gapp']




Found 157 bibliographic references in tmp_2507.13439/aa54916-25.bbl.
Retrieving document from  https://arxiv.org/e-print/2507.13448


extracting tarball to tmp_2507.13448... done.
Retrieving document from  https://arxiv.org/e-print/2507.13771



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


invalid header


Retrieving document from  https://arxiv.org/e-print/2507.13811


extracting tarball to tmp_2507.13811... done.
Retrieving document from  https://arxiv.org/e-print/2507.13850


extracting tarball to tmp_2507.13850...

 done.
  0: tmp_2507.13850/latest_draft.tex, 1,259 lines
  1: tmp_2507.13850/J2008_paper_ARXIV_submission/latest_draft.tex, 1,259 lines
Retrieving document from  https://arxiv.org/e-print/2507.13921
extracting tarball to tmp_2507.13921...


  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


 done.


A. Winter  ->  A. Winter  |  ['A. Winter']
T. Henning  ->  T. Henning  |  ['T. Henning']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']


Found 183 bibliographic references in tmp_2507.13921/aa55718-25.bbl.
Issues with the citations
syntax error in line 16: '}' expected


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.13439-b31b1b.svg)](https://arxiv.org/abs/2507.13439) | **The SPACE Program I: The featureless spectrum of HD 86226 c challenges sub-Neptune atmosphere trends**  |
|| <mark>K. A. Kahle</mark>, et al. -- incl., <mark>L. Kreidberg</mark>, <mark>P. Mollière</mark>, <mark>L. Acuña</mark>, <mark>C. Gapp</mark> |
|*Appeared on*| *2025-07-21*|
|*Comments*| *Accepted for publication in Astronomy & Astrophysics*|
|**Abstract**|            Sub-Neptune exoplanets are the most abundant type of planet known today. As they do not have a Solar System counterpart, many open questions exist about their composition and formation. Previous spectroscopic studies rule out aerosol-free hydrogen-helium-dominated atmospheres for many characterized sub-Neptunes but are inconclusive about their exact atmospheric compositions. Here we characterize the hot (Teq=1311K) sub-Neptune HD 86226 c, which orbits its G-type host star. Its high equilibrium temperature prohibits methane-based haze formation, increasing the chances for a clear atmosphere on this planet. We use HST data taken with WFC3 and STIS from the Sub-neptune Planetary Atmosphere Characterization Experiment (SPACE) Program to perform near-infrared 1.1-1.7micrometer transmission spectroscopy and UV characterization of the host star. We report a featureless transmission spectrum that is consistent within 0.4 sigma with a constant transit depth of 418+-14ppm. The amplitude of this spectrum is only 0.01 scale heights for a H/He-dominated atmosphere, excluding a cloud-free solar-metallicity atmosphere on HD 86226 c with a confidence of 6.5 sigma. Based on an atmospheric retrieval analysis and forward models of cloud and haze formation, we find that the featureless spectrum could be due to a metal enrichment [M/H] above 2.3 (3 sigma confidence lower limit) of a cloudless atmosphere, or silicate (MgSiO3), iron (Fe), or manganese sulfide (MnS) clouds. For these species, we perform an investigation of cloud formation in high-metallicity, high-temperature atmospheres. Our results highlight that HD 86226c does not follow the aerosol trend of sub-Neptunes found by previous studies. Follow-up observations with the JWST could determine whether this planet aligns with the recent detections of metal-enriched atmospheres or if it harbors a cloud species otherwise atypical for sub-Neptunes.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.13921-b31b1b.svg)](https://arxiv.org/abs/2507.13921) | **XUE 10. The CO$_2$-rich terrestrial planet-forming region of an externally irradiated Herbig disk**  |
|| J. Frediani, et al. -- incl., <mark>A. Winter</mark>, <mark>T. Henning</mark>, <mark>G. Perotti</mark> |
|*Appeared on*| *2025-07-21*|
|*Comments*| *20 pages, 13 figures, 4 tables*|
|**Abstract**|            We investigate the James Webb Space Telescope (JWST) MIRI MRS gas molecular content of an externally irradiated Herbig disk, the F-type XUE 10 source, in the context of the eXtreme UV Environments (XUE) program. XUE 10 belongs to the massive star cluster NGC 6357 (1.69 kpc), where it is exposed to an external far-ultraviolet (FUV) radiation $\approx$ 10$^3$ times stronger than in the Solar neighborhood. We modeled the molecular features in the mid-infrared spectrum with Local Thermodynamic Equilibrium (LTE) 0D slab models. We derived basic parameters of the stellar host from a VLT FORS2 optical spectrum using PHOENIX stellar templates. We detect bright CO2 gas with the first simultaneous detection (> 5$\sigma$) of four isotopologues (12CO2, 13CO2, 16O12C18O, 16O12C17O) in a protoplanetary disk. We also detect faint CO emission (2$\sigma$) and the HI Pf$\alpha$ line (8$\sigma$). We also place strict upper limits on the water content, finding a total column density $\lesssim$ 10$^{18}$ cm$^{-2}$. The CO2 species trace low gas temperatures (300-370 K) with a range of column densities of 7.4 $\times$ 10$^{17}$ cm$^{-2}$ (16O12C17O)-1.3 $\times$ 10$^{20}$ cm$^{-2}$ (12CO2) in an equivalent emitting radius of 1.15 au. The emission of 13CO2 is likely affected by line optical depth effects. 16O12C18O and 16O12C17O abundances may be isotopically anomalous compared to the 16O/18O and 16O/17O ratios measured in the interstellar medium and the Solar System. We propose that the mid-infrared spectrum of XUE 10 is explained by H2O removal either via advection or strong photo-dissociation by stellar UV irradiation, and enhanced local CO2 gas-phase production. Outer disk truncation supports the observed CO2-H2O dichotomy. A CO2 vapor enrichment in 18O and 17O can be explained by means of external UV irradiation and early on (10$^{4-5}$ yr) delivery of isotopically anomalous water ice to the inner disk.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.13850-b31b1b.svg)](https://arxiv.org/abs/2507.13850) | **A half-ring of ionized circumstellar material trapped in the magnetosphere of a white dwarf merger remnant**  |
|| A. A. Cristea, et al. -- incl., <mark>K. El-Badry</mark> |
|*Appeared on*| *2025-07-21*|
|*Comments*| *Submitted to A&A, 36 pages, 27 figures. Comments are very welcome*|
|**Abstract**|            Many white dwarfs are observed in compact double white dwarf binaries and, through the emission of gravitational waves, a large fraction are destined to merge. The merger remnants that do not explode in a Type Ia supernova are expected to initially be rapidly rotating and highly magnetized. We here present our discovery of the variable white dwarf ZTF J200832.79+444939.67, hereafter ZTF J2008+4449, as a likely merger remnant showing signs of circumstellar material without a stellar or substellar companion. The nature of ZTF J2008+4449 as a merger remnant is supported by its physical properties: hot ($35,500\pm300$ K) and massive ($1.12\pm0.03$ M$_\odot$), the white dwarf is rapidly rotating with a period of $\approx$ 6.6 minutes and likely possesses exceptionally strong magnetic fields ($\sim$ 400-600 MG) at its surface. Remarkably, we detect a significant period derivative of $(1.80\pm0.09)\times10^{-12}$ s/s, indicating that the white dwarf is spinning down, and a soft X-ray emission that is inconsistent with photospheric emission. As the presence of a mass-transferring stellar or brown dwarf companion is excluded by infrared photometry, the detected spin down and X-ray emission could be tell-tale signs of a magnetically driven wind or of interaction with circumstellar material, possibly originating from the fallback of gravitationally bound merger ejecta or from the tidal disruption of a planetary object. We also detect Balmer emission, which requires the presence of ionized hydrogen in the vicinity of the white dwarf, showing Doppler shifts as high as $\approx$ 2000 km s$^{-1}$. The unusual variability of the Balmer emission on the spin period of the white dwarf is consistent with the trapping of a half ring of ionised gas in the magnetosphere of the white dwarf.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.13448-b31b1b.svg)](https://arxiv.org/abs/2507.13448) | **Dwarf Galaxies at Cosmic Noon: New JWST Constraints on Satellite Models and Subhalo Tidal Evolution**  |
|| J. T. Wan, et al. -- incl., <mark>Y. Wang</mark> |
|*Appeared on*| *2025-07-21*|
|*Comments*| *Submitted to ApJ; comments welcome!*|
|**Abstract**|            The advent of JWST has revolutionized the study of faint satellite galaxies at $z \gtrsim 1$, enabling statistical constraints on galaxy evolution and the galaxy$-$halo connection in a previously unexplored mass and redshift regime. We compare satellite abundances at $1 < z < 3.5$ from recent JWST observations with predictions from cosmological dark matter-only zoom-in simulations. We identify and quantify several sources of biases that can impact theoretical satellite counts, finding that assumptions about subhalo tidal evolution introduce the largest uncertainty in predictions for the satellite mass function. Using a flexible galaxy disruption model, we explore a range of disruption scenarios, spanning hydrodynamically motivated and idealized prescriptions, to bracket plausible physical outcomes. We show that varying galaxy durability can change the predicted satellite mass functions by a factor of $\sim3.5$. The JWST data and our fiducial model are consistent within $1-2\sigma$ across the full redshift ($1 < z < 3.5$) and stellar mass ($M_\star> 10^7~\mathrm{M}_\odot$) range probed. We find evidence that subhalos are at least as long-lived as predicted by hydrodynamic simulations. Our framework will enable robust constraints on the tidal evolution of subhalos with future observations. This work presents the first direct comparison between cosmological models and observations of the high-redshift satellite population in this low-mass regime. These results showcase JWST's emerging power to test structure formation in the first half of the Universe in a new domain and to constrain the physical processes driving the evolution of low-mass galaxies across cosmic time.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.13811-b31b1b.svg)](https://arxiv.org/abs/2507.13811) | **Testing the cosmic distance duality relation with baryon acoustic oscillations and supernovae data**  |
|| T.-N. Li, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-07-21*|
|*Comments*| *9 pages, 4 figures*|
|**Abstract**|            One of the most fundamental relationships in modern cosmology is the cosmic distance duality relation (CDDR), which describes the relationship between the angular diameter distance ($D_{\rm A}$) and the luminosity distance ($D_{\rm L}$), and is expressed as: $\eta(z)=D_{\rm L}(z)(1+z)^{-2}/D_{\rm A}(z)=1$. In this work, we conduct a comprehensive test of the CDDR by combining baryon acoustic oscillation (BAO) data from the SDSS and DESI surveys with type Ia supernova (SN) data from PantheonPlus and DESY5. We utilize an artificial neural network approach to match the SN and BAO data at the same redshift. To explore potential violations of the CDDR, we consider three different parameterizations: (i) $\eta(z)=1+\eta_0z$; (ii) $\eta(z)=1+\eta_0z/(1+z)$; (iii) $\eta(z)=1+\eta_0\ln(1+z)$. Our results indicate that the calibration of the SN absolute magnitude $M_{\rm B}$ plays a crucial role in testing potential deviations from the CDDR, as there exists a significant negative correlation between $\eta_0$ and $M_{\rm B}$. For PantheonPlus analysis, when $M_{\rm B}$ is treated as a free parameter, no evidence of CDDR violation is found. In contrast, fixing $M_{\rm B}$ to the $M_{\rm B}^{\rm D20}$ prior with $-19.230\pm0.040$ mag leads to a deviation at approximately the $2\sigma$ level, while fixing $M_{\rm B}$ to the $M_{\rm B}^{\rm B23}$ prior with $-19.396\pm0.016$ mag remains in agreement with the CDDR. Furthermore, overall analyses based on the SDSS+DESY5 and DESI+DESY5 data consistently show no evidence of the deviation from the CDDR.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2507.13771-b31b1b.svg)](https://arxiv.org/abs/2507.13771) | **The Pierre Auger Observatory: Contributions to the 39th International Cosmic Rays Conference**  |
|| P. A. Collaboration, et al. |
|*Appeared on*| *2025-07-21*|
|*Comments*| **|
|**Abstract**|            The Pierre Auger Observatory, located in La Pampa Amarilla, Argentina, has been continuously acquiring data since 2004. It comprises a surface detector array covering 3,000 km$^2$ and 27 fluorescence telescopes, designed to detect extensive air showers initiated by ultra-high-energy cosmic rays. An upgrade to the Observatory was commissioned in 2024, enhancing the existing water-Cherenkov detectors with additional radio antennas, surface scintillator detectors, and a buried scintillator array. This compilation of contributions to the 39th International Cosmic Ray Conference, held in Geneva, Switzerland (July 15-24, 2025), presents recent results from the Pierre Auger Collaboration, addressing a wide range of fundamental questions in astroparticle physics. The included papers cover measurements of the energy spectrum, mass composition, and arrival directions of ultra-high-energy cosmic rays, investigations of hadronic interactions in extensive air showers, and searches for ultra-high-energy photons and neutrinos. Additional topics include radio detection techniques, solar-related phenomena, and atmospheric events such as ELVES and TGFs. The list also contains first results and performance evaluations of the upgraded detectors, AugerPrime, along with reports on outreach and social engagement initiatives conducted by the Collaboration.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error invalid header</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['', '', '', '', '', '', '', '', '', '', '']
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
file not found 
exported in  _build/html/2507.13439.md
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
    + _build/html/
found figures ['tmp_2507.13921/./XUE10_chi2maps-cropped.png', 'tmp_2507.13921/./XUE10_CO2models-cropped.png', 'tmp_2507.13921/./XUE10_H2O_models-cropped.png']
copying  tmp_2507.13921/./XUE10_chi2maps-cropped.png to _build/html/
copying  tmp_2507.13921/./XUE10_CO2models-cropped.png to _build/html/
copying  tmp_2507.13921/./XUE10_H2O_models-cropped.png to _build/html/
exported in  _build/html/2507.13921.md
    + _build/html/tmp_2507.13921/./XUE10_chi2maps-cropped.png
    + _build/html/tmp_2507.13921/./XUE10_CO2mod

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\orcit}[1]{\protect\href{https://orcid.org/#1}{\protect\includegraphics[width=8pt]{figs/orcid.png}}}$
$\newcommand$
$\newcommand{\HD}{HD 86226 c\xspace}$
$\newcommand{\U}{\mathrm}$
$\newcommand{\tr}{\textcolor{red}}$
$\newcommand{\tg}{\textcolor{green}}$
$\newcommand$
$\newcommand{\hii}{\hbox{{\rm H {\scriptsize II}}}~}$</div>



<div id="title">

# The SPACE Program I: The featureless spectrum of $\HD$ challenges sub-Neptune atmosphere trends

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2507.13439-b31b1b.svg)](https://arxiv.org/abs/2507.13439)<mark>Appeared on: 2025-07-21</mark> -  _Accepted for publication in Astronomy & Astrophysics_

</div>
<div id="authors">

<mark>K. A. Kahle</mark>, et al. -- incl., <mark>L. Kreidberg</mark>, <mark>P. Mollière</mark>, <mark>L. Acuña</mark>, <mark>C. Gapp</mark>

</div>
<div id="abstract">

**Abstract:** Sub-Neptune exoplanets are the most abundant type of planet known today. As they do not have a Solar System counterpart, many open questions exist about their composition and formation.    Previous spectroscopic studies rule out aerosol-free hydrogen-helium-dominated atmospheres for many characterized sub-Neptunes but are inconclusive about their exact atmospheric compositions. Here we characterize the hot (T $_\mathrm{eq}$ =1311 K) sub-Neptune HD 86226 c (R=2.2 $ \si{\rearth}$ , M= $\SI{7.25}{\mearth}$ ), which orbits its G-type host star on a 4 day orbit. The planet is located in a special part of the sub-Neptune parameter space:    its high equilibrium temperature prohibits methane-based haze formation, increasing the chances for a clear atmosphere on this planet.    We use HST data taken with WFC3 and STIS from the Sub-neptune Planetary Atmosphere Characterization Experiment (SPACE) Program to perform near-infrared ( $1.1$ -- $1.7$ $\si{\micro\meter}$ ) transmission spectroscopy and ultraviolet characterization of the host star.    We report a featureless transmission spectrum that is consistent within $0.4 \sigma$ with a constant transit depth of $418\pm14$ ppm. The amplitude of this spectrum is only 0.01 scale heights for a H/He-dominated atmosphere, excluding a cloud-free solar-metallicity atmosphere on HD 86226 c with a confidence of $6.5 \sigma$ .    Based on an atmospheric retrieval analysis and forward models of cloud and haze formation, we find that the featureless spectrum could be due to a metal enrichment $[\mathrm{M}/\mathrm{H}]>2.3$ ( $3 \sigma$ confidence lower limit) of a cloudless atmosphere, or alternatively silicate ($MgSiO_3$ ), iron (Fe), or manganese sulfide (MnS) clouds. For these species, we perform a detailed investigation of cloud formation in high metallicity, high-temperature atmospheres. Our results highlight that HD 86226 c does not follow the aerosol trend of sub-Neptunes found by previous studies. Follow-up observations with the James Webb Space Telescope could determine whether this planet aligns with the recent detections of metal-enriched atmospheres or if it harbors a cloud species otherwise atypical for sub-Neptunes.

</div>

<div id="div_fig1">

<img src="" alt="Fig12.1" width="16%"/><img src="" alt="Fig12.2" width="16%"/><img src="" alt="Fig12.3" width="16%"/><img src="" alt="Fig12.4" width="16%"/><img src="" alt="Fig12.5" width="16%"/><img src="" alt="Fig12.6" width="16%"/>

**Figure 12. -**  Comparison between different grid models and the goodness of fit for the MnS (top row), $MgSiO_3$(middle row), and Fe (bottom row) clouds calculated at the planetary radius set to 2.313 $\si{\rearth}$. For each species, the left panel shows our observations with uncertainties compared against some selected lowest reduced chi-squared ($\chi^2_\nu$) models, with the corresponding $\chi^2_\nu$ values in the bottom legend. The parameter values for clear, no-cloud models are given in the left legend, and for cloudy models, they are given in the right legend. The flat model is drawn as a dotted line at a transit depth of 418 ppm. The tables in the middle and right panels display the models' $\chi^2_\nu$ values on a grid of cloud droplet radius $r_g$, droplet volume mixing ratio $q^*$, and metallicity [M/H]. The table titles list fixed parameters. The color bar extent is set between 1 and 2 to highlight only the models that best fit the data.  (*fig:JB_MnS_MgSiO3_Fe*)

</div>
<div id="div_fig2">

<img src="" alt="Fig1.1" width="50%"/><img src="" alt="Fig1.2" width="50%"/>

**Figure 1. -** SPACE targetsSub-Neptune targets of the SPACE program. $\HD$ is shown with a green square marker, and the other SPACE targets are shown in purple. Top: Temperature-radius plane. Gray circles show the known population of planets with radii between $\SI{1.8}{\rearth}$ and $\SI{4}{\rearth}$ based on the entries of the NASA Exoplanet Archive$\footnote$mark in May 2024. Black crosses mark JWST Cycle 1-4 targets, except for the SPACE targets TOI-431d and $\HD$, which will be observed in Cycle 4. Bottom: Mass-radius plane. Color curves show models from [Zeng, Jacobsen and Sasselov (2019)]() for various planetary compositions and temperatures. (*fig:intro:SPACE*)

</div>
<div id="div_fig3">

<img src="" alt="Fig11.1" width="33%"/><img src="" alt="Fig11.2" width="33%"/><img src="" alt="Fig11.3" width="33%"/>

**Figure 11. -** Cloud scattering and absorption coefficients of MnS, $MgSiO_3$, and Fe condensates. All species are featureless on the narrow wavelength range of our observations, 1.1 -- 1.7 $\si{\micro\meter}$(marked with blue vertical lines), for the expected sizes of the cloud droplets of 0.01 -- 100 $\si{\micro\meter}$. As shown, this is not the case for MnS and $MgSiO_3$ clouds in the range of 0.2 -- 15 $\si{\micro\meter}$, where their effect on the spectra should be distinguishable, providing high-resolution observations. Note that values are plotted in log-log space to highlight the narrow wavelength range of our observations. (*fig:JB_Mieff_coeffs*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2507.13439"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$</div>



<div id="title">

# XUE: The $CO_2$-rich terrestrial planet-forming region\ of an externally irradiated Herbig disk

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2507.13921-b31b1b.svg)](https://arxiv.org/abs/2507.13921)<mark>Appeared on: 2025-07-21</mark> -  _20 pages, 13 figures, 4 tables_

</div>
<div id="authors">

J. Frediani, et al. -- incl., <mark>A. Winter</mark>, <mark>T. Henning</mark>, <mark>G. Perotti</mark>

</div>
<div id="abstract">

**Abstract:**            We investigate the James Webb Space Telescope (JWST) MIRI MRS gas molecular content of an externally irradiated Herbig disk, the F-type XUE 10 source, in the context of the eXtreme UV Environments (XUE) program. XUE 10 belongs to the massive star cluster NGC 6357 (1.69 kpc), where it is exposed to an external far-ultraviolet (FUV) radiation $\approx$ 10$^3$ times stronger than in the Solar neighborhood. We modeled the molecular features in the mid-infrared spectrum with Local Thermodynamic Equilibrium (LTE) 0D slab models. We derived basic parameters of the stellar host from a VLT FORS2 optical spectrum using PHOENIX stellar templates. We detect bright CO2 gas with the first simultaneous detection (> 5$\sigma$) of four isotopologues (12CO2, 13CO2, 16O12C18O, 16O12C17O) in a protoplanetary disk. We also detect faint CO emission (2$\sigma$) and the HI Pf$\alpha$ line (8$\sigma$). We also place strict upper limits on the water content, finding a total column density $\lesssim$ 10$^{18}$ cm$^{-2}$. The CO2 species trace low gas temperatures (300-370 K) with a range of column densities of 7.4 $\times$ 10$^{17}$ cm$^{-2}$ (16O12C17O)-1.3 $\times$ 10$^{20}$ cm$^{-2}$ (12CO2) in an equivalent emitting radius of 1.15 au. The emission of 13CO2 is likely affected by line optical depth effects. 16O12C18O and 16O12C17O abundances may be isotopically anomalous compared to the 16O/18O and 16O/17O ratios measured in the interstellar medium and the Solar System. We propose that the mid-infrared spectrum of XUE 10 is explained by H2O removal either via advection or strong photo-dissociation by stellar UV irradiation, and enhanced local CO2 gas-phase production. Outer disk truncation supports the observed CO2-H2O dichotomy. A CO2 vapor enrichment in 18O and 17O can be explained by means of external UV irradiation and early on (10$^{4-5}$ yr) delivery of isotopically anomalous water ice to the inner disk.         

</div>

<div id="div_fig1">

<img src="tmp_2507.13921/./XUE10_chi2maps-cropped.png" alt="Fig13" width="100%"/>

**Figure 13. -** Reduced chi-square $N_{\rm tot}$--$T_{\rm gas}$ maps resulting from the fitting of 0D LTE gas slab models to the MIRI MRS spectrum for various molecular species at increasing wavelength.
    The fit of the carbon dioxide isotopologues, $^{13}$$CO_2$, $^{16}$O$^{12}$C$^{18}$O, and $^{16}$O$^{12}$C$^{17}$O with fixed/free emitting radius are labeled in figure. In colorbar are the $\chi^{2}_{red}/\chi^{2}$ values, with 1.0 corresponding to the best-fit model. The white contours show the range of fitted emitting radii in au, while the red contours show the 1$\sigma$, 2$\sigma$, and 3$\sigma$ confidence intervals. The location of the best-fit models in the parameter space is indicated by a black cross marker. (*fig:chisquaremaps*)

</div>
<div id="div_fig2">

<img src="tmp_2507.13921/./XUE10_CO2models-cropped.png" alt="Fig5" width="100%"/>

**Figure 5. -** Continuum-subtracted MIRI spectrum of XUE 10 (black) with overlaid best-fit slab models of the identified carbon dioxide isotopologues, $^{12}$$CO_2$(red), $^{13}$$CO_2$(orange), $^{16}$O$^{12}$C$^{18}$O (green), $^{16}$O$^{12}$C$^{17}$O (purple) between 12.93 $\mu$m and 17.6 $\mu$m (from top to bottom panel). The vibrational quantum numbers $(\upsilon_1 \upsilon_2 \upsilon_3)$ corresponding to the fundamental $\upsilon_2$$\mathit{Q}$-branch and its associated hot bands are labeled for each species. The inset in the bottom panel shows the fiducial LTE slab model of $H_2$O that matches the observed line luminosity between 17.08--17.4 $\mu$m. The colored horizontal bars indicate the fitted wavelength ranges for $CO_2$ listed in Table \ref{tab:fitting_ranges}. The spectral uncertainty is indicated in each panel. (*fig:13-18mu_slabmodels*)

</div>
<div id="div_fig3">

<img src="tmp_2507.13921/./XUE10_H2O_models-cropped.png" alt="Fig8" width="100%"/>

**Figure 8. -** Continuum-subtracted MIRI spectrum of XUE 10 (black) and fiducial LTE slab models of $H_2$O between 6.4 $\mu$m and 7.6 $\mu$m (top panel), and between 18 $\mu$m and 25 $\mu$m (middle and bottom panel). The observed spectrum is vertically offset for visual clarity. The vertical dotted line marks the detected H$\alpha$ line of the Pfund series. The colored horizontal bars indicate where $H_2$O lines are tentatively identified. The spectral uncertainty is indicated in each panel. (*fig:1825mu_slabmodels*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2507.13921"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

145  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

18  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
