# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
A. Hughes  ->  A. Hughes  |  ['A. Hughes']
S. Hannon  ->  S. Hannon  |  ['S. Hannon']
E.-M. Ahrer  ->  E.-M. Ahrer  |  ['E.-M. Ahrer']
D. Christie  ->  D. Christie  |  ['D. Christie']
C. Gapp  ->  C. Gapp  |  ['C. Gapp']
K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
Arxiv has 67 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2410.02864
extracting tarball to tmp_2410.02864...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
S. Hannon  ->  S. Hannon  |  ['S. Hannon']




































Found 121 bibliographic references in tmp_2410.02864/pap_phangs_ml_pahs.bbl.
Retrieving document from  https://arxiv.org/e-print/2410.03527


extracting tarball to tmp_2410.03527...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


E.-M. Ahrer  ->  E.-M. Ahrer  |  ['E.-M. Ahrer']
D. Christie  ->  D. Christie  |  ['D. Christie']
L. Acuña  ->  L. Acuña  |  ['L. Acuña']
C. Gapp  ->  C. Gapp  |  ['C. Gapp']


Found 148 bibliographic references in tmp_2410.03527/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2410.03589


extracting tarball to tmp_2410.03589...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2410.02864-b31b1b.svg)](https://arxiv.org/abs/2410.02864) | **PHANGS-ML: the universal relation between PAH band and optical line ratios across nearby star-forming galaxies**  |
|| D. Baron, et al. -- incl., <mark>E. Schinnerer</mark>, <mark>A. Hughes</mark>, <mark>S. Hannon</mark> |
|*Appeared on*| *2024-10-07*|
|*Comments*| *resubmitted to ApJ after addressing referee report; Figure 12 summarizes the results*|
|**Abstract**|            The structure and chemistry of the dusty interstellar medium (ISM) are shaped by complex processes that depend on the local radiation field, gas composition, and dust grain properties. Of particular importance are Polycyclic Aromatic Hydrocarbons (PAHs), which emit strong vibrational bands in the mid-infrared, and play a key role in the ISM energy balance. We recently identified global correlations between PAH band and optical line ratios across three nearby galaxies, suggesting a connection between PAH heating and gas ionization throughout the ISM. In this work, we perform a census of the PAH heating -- gas ionization connection using $\sim$700,000 independent pixels that probe scales of 40--150 pc in nineteen nearby star-forming galaxies from the PHANGS survey. We find a universal relation between $\log$PAH(11.3 \mic/7.7 \mic) and $\log$([SII]/H$\alpha$) with a slope of $\sim$0.2 and a scatter of $\sim$0.025 dex. The only exception is a group of anomalous pixels that show unusually high (11.3 \mic/7.7 \mic) PAH ratios in regions with old stellar populations and high starlight-to-dust emission ratios. Their mid-infrared spectra resemble those of elliptical galaxies. AGN hosts show modestly steeper slopes, with a $\sim$10\% increase in PAH(11.3 \mic/7.7 \mic) in the diffuse gas on kpc scales. This universal relation implies an emerging simplicity in the complex ISM, with a sequence that is driven by a single varying property: the spectral shape of the interstellar radiation field. This suggests that other properties, such as gas-phase abundances, gas ionization parameter, and grain charge distribution, are relatively uniform in all but specific cases.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2410.03527-b31b1b.svg)](https://arxiv.org/abs/2410.03527) | **JWST/NIRISS reveals the water-rich "steam world" atmosphere of GJ 9827 d**  |
|| C. Piaulet-Ghorayeb, et al. -- incl., <mark>E.-M. Ahrer</mark>, <mark>D. Christie</mark>, <mark>C. Gapp</mark> |
|*Appeared on*| *2024-10-07*|
|*Comments*| *37 pages, 18 figures, Accepted for publication in ApJL*|
|**Abstract**|            With sizable volatile envelopes but smaller radii than the solar system ice giants, sub-Neptunes have been revealed as one of the most common types of planet in the galaxy. While the spectroscopic characterization of larger sub-Neptunes (2.5-4R$_\oplus$) has revealed hydrogen-dominated atmospheres, smaller sub-Neptunes (1.6--2.5R$_\oplus$) could either host thin, rapidly evaporating hydrogen-rich atmospheres or be stable metal-rich "water worlds" with high mean molecular weight atmospheres and a fundamentally different formation and evolutionary history. Here, we present the 0.6--2.8$\mu$m JWST NIRISS/SOSS transmission spectrum of GJ 9827 d, the smallest (1.98 R$_\oplus$) warm (T$_\mathrm{eq, A_B=0.3} \sim 620$K) sub-Neptune where atmospheric absorbers have been detected to date. Our two transit observations with NIRISS/SOSS, combined with the existing HST/WFC3 spectrum, enable us to break the clouds-metallicity degeneracy. We detect water in a highly metal-enriched "steam world" atmosphere (O/H of $\sim 4$ by mass and H$_2$O found to be the background gas with a volume mixing ratio of >31%). We further show that these results are robust to stellar contamination through the transit light source effect. We do not detect escaping metastable He, which, combined with previous nondetections of escaping He and H, supports the steam atmosphere scenario. In water-rich atmospheres, hydrogen loss driven by water photolysis happens predominantly in the ionized form which eludes observational constraints. We also detect several flares in the NIRISS/SOSS light-curves with far-UV energies of the order of 10$^{30}$ erg, highlighting the active nature of the star. Further atmospheric characterization of GJ 9827 d probing carbon or sulfur species could reveal the origin of its high metal enrichment.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2410.03589-b31b1b.svg)](https://arxiv.org/abs/2410.03589) | **Variability of Central Stars of Planetary Nebulae with the Zwicky Transient Facility. I. Methods, Short-Timescale Variables, Binary Candidates, and the Unusual Nucleus of WeSb 1**  |
|| S. Bhattacharjee, et al. -- incl., <mark>K. El-Badry</mark> |
|*Appeared on*| *2024-10-07*|
|*Comments*| *19 pages + 8 pages appendix, 5 tables, 17 figures; Submitted to PASP; Comments are welcome!*|
|**Abstract**|            Over the past several decades, time-series photometry of CSPNe has yielded significant results including, but not limited to, discoveries of nearly 100 binary systems, insights into pulsations and winds in young white dwarfs, and studies of stars undergoing very late thermal pulses. We have undertaken a systematic study of optical photometric variability of cataloged CSPNe, using the epochal photometric data from the Zwicky Transient Facility (ZTF). By applying appropriate variability metrics, we arrive at a list of 94 significantly variable CSPNe. Based on the timescales of the light-curve activity, we classify the variables broadly into short- and long-timescale variables. In this first paper in this series, we focus on the former, which is the majority class comprising 83 objects. We infer periods for six sources for the first time, and recover several known periodic variables. Among the aperiodic sources, most exhibit a jitter around a median flux with a stable amplitude, and a few show outbursts. We draw attention to WeSb 1, which shows a different kind of variability: prominent deep and aperiodic dips, resembling transits from a dust/debris disk. We find strong evidence for a binary nature of WeSb 1 (possibly an A- to G-type companion). The compactness of the emission lines and inferred high electron densities make WeSb 1 a candidate for either an EGB 6-type planetary nucleus, or a symbiotic system inside an evolved planetary nebula, both of which are rare objects. To demonstrate further promise with ZTF, we report three additional newly identified periodic sources that do not appear in the list of highly variable sources. Finally, we also introduce a two-dimensional metric space defined by the von Neumann statistics and Pearson Skew and demonstrate its effectiveness in identifying unique variables of astrophysical interest, like WeSb 1.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2410.02864/./figures/correlations_sep_ordered_by_slope.png', 'tmp_2410.02864/./figures/feature_display.png', 'tmp_2410.02864/./figures/emerging_picture_PAH_ionized_gas.png']
copying  tmp_2410.02864/./figures/correlations_sep_ordered_by_slope.png to _build/html/
copying  tmp_2410.02864/./figures/feature_display.png to _build/html/
copying  tmp_2410.02864/./figures/emerging_picture_PAH_ionized_gas.png to _build/html/
exported in  _build/html/2410.02864.md
    + _build/html/tmp_2410.02864/./figures/correlations_sep_ordered_by_slope.png
    + _build/html/tmp_2410.02864/./figures/feature_display.png
    + _build/html/tmp_2410.02864/./figures/emerging_picture_PAH_ionized_gas.png
found figures ['tmp_2410.03527/./scarlet_poseidon_posteriors.png', 'tmp_2410.03527/./metallicity_pcloud_Zatm_3cases_new.png', 'tmp_2410.03527/./wlc_slc_fit_supremeSPOON.png']
copying  tmp_2410.03527/./scarlet_poseidon_posteriors.png to _build/html/
copying  tmp_2410.03527/./metallicity_pcloud_Zatm

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\halpha}{H\alpha\xspace}$
$\newcommand{\hbeta}{H\beta\xspace}$
$\newcommand{\oiiifull}{\text{[O {\sc iii}]}\lambda   5007\mathrm{Å}\xspace}$
$\newcommand{\oiii}{\text{[O {\sc iii}]}\xspace}$
$\newcommand{\oifull}{\text{[O {\sc i}]}\lambda   6300\mathrm{Å}\xspace}$
$\newcommand{\oi}{\text{[O {\sc i}]}\xspace}$
$\newcommand{\niifull}{\text{[N {\sc ii}]}\lambda   6584\mathrm{Å}\xspace}$
$\newcommand{\nii}{\text{[N {\sc ii}]}\xspace}$
$\newcommand{\siifull}{\text{[S {\sc ii}]}\lambda\lambda   6717\mathrm{Å}+6731\mathrm{Å}\xspace}$
$\newcommand{\sii}{\text{[S {\sc ii}]}\xspace}$
$\newcommand{\oiiihbeta}{\log (\text{[O {\sc iii}]}/\text{H}\beta)\xspace}$
$\newcommand{\niihalpha}{\log (\text{[N {\sc ii}]}/\text{H}\alpha)\xspace}$
$\newcommand{\siihalpha}{\log (\text{[S {\sc ii}]}/\text{H}\alpha)\xspace}$
$\newcommand{\oihalpha}{\log (\text{[O {\sc i}]}/\text{H}\alpha)\xspace}$
$\newcommand{\cofull}{\mathrm{^{12}CO(2-1)} }$
$\newcommand{\mic}{\mathrm{\mu m}\xspace}$</div>



<div id="title">

# PHANGS-ML: the universal relation between PAH band and optical line ratios across nearby star-forming galaxies

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2410.02864-b31b1b.svg)](https://arxiv.org/abs/2410.02864)<mark>Appeared on: 2024-10-07</mark> -  _resubmitted to ApJ after addressing referee report; Figure 12 summarizes the results_

</div>
<div id="authors">

D. Baron, et al. -- incl., <mark>E. Schinnerer</mark>, <mark>S. Hannon</mark>

</div>
<div id="abstract">

**Abstract:** The structure and chemistry of the dusty interstellar medium (ISM) are shaped by complex processes that depend on the local radiation field, gas composition, and dust grain properties. Of particular importance are Polycyclic Aromatic Hydrocarbons (PAHs), which emit strong vibrational bands in the mid-infrared, and play a key role in the ISM energy balance. We recently identified global correlations between PAH band and optical line ratios across three nearby galaxies, suggesting a connection between PAH heating and gas ionization throughout the ISM. In this work, we perform a census of the PAH heating -- gas ionization connection using $\sim$ 700,000 independent pixels that probe scales of 40--150 pc in nineteen nearby star-forming galaxies from the PHANGS survey. We find a universal relation between $\log$ PAH(11.3 $\mic$ /7.7 $\mic$ ) and $\log$ ( [ SII ] /H $\alpha$ ) with a slope of $\sim$ 0.2 and a scatter of $\sim$ 0.025 dex. The only exception is a group of anomalous pixels that show unusually high (11.3 $\mic$ /7.7 $\mic$ ) PAH ratios in regions with old stellar populations and high starlight-to-dust emission ratios. Their mid-infrared spectra resemble those of elliptical galaxies. AGN hosts show modestly steeper slopes, with a $\sim$ 10 \% increase in PAH(11.3 $\mic$ /7.7 $\mic$ ) in the diffuse gas on kpc scales. This universal relation implies an emerging simplicity in the complex ISM, with a sequence that is driven by a single varying property: the spectral shape of the interstellar radiation field. This suggests that other properties, such as gas-phase abundances, gas ionization parameter, and grain charge distribution, are relatively uniform in all but specific cases.

</div>

<div id="div_fig1">

<img src="tmp_2410.02864/./figures/correlations_sep_ordered_by_slope.png" alt="Fig6" width="100%"/>

**Figure 6. -** **The $\mathrm{\log**$PAH(11.3/7.7) versus $\sii$halpha relations across individual PHANGS galaxies on scales of 40--120 pc.} Each panel shows the 2D distribution of the $\log$PAH(11.3/7.7) band ratio versus the $\sii$halpha optical line ratio across a single PHANGS galaxy. The gray color-coding represent the number of pixels with the corresponding PAH and optical line ratios, where the total number of pixels ranges from 9 000 to 80 000, with most galaxies having around 20 000 spatially independent pixels. The black contours encompass the regions within which the counts are 5, 20, and 50. The relations are obtained using the maps at the $C_{\mathrm{opt}}$ resolution, and each panel notes the spatial scale in parsec probed for the galaxy. The red error bars represent 6 bins in $\sii$halpha and their medians and median absolute deviations of the $\log$PAH(11.3/7.7) ratio in the bin. The red solid lines represent the best-fitting linear relations of the bins. The galaxies are ordered by their best-fitting slope, from the shallowest (NGC 1087) to the steepest (NGC 1365). Galaxies with pixels belonging to the anomalous group identified in Section \ref{sec:results:PCA} and studied separately in Section \ref{sec:results:anomalous_PAHs} are marked with orange edges. These pixels are filtered out and excluded from 2D histograms, contours, and best fits. Galaxies with known Seyfert nuclei are marked with blue edges. (*f:correlations_sep_ordered_by_slope*)

</div>
<div id="div_fig2">

<img src="tmp_2410.02864/./figures/feature_display.png" alt="Fig5" width="100%"/>

**Figure 5. -** **Two-dimensional visualization by {\sc pca** of the optical-infrared feature space spanned by $\sim$100 000 150 pc-scale pixels from the 19 PHANGS galaxies.}
The top left panel shows the result of the {\sc pca} decomposition applied to 108 403 spatially independent pixels that trace different optical and infrared features measured over a 150 pc scale. The bars represent the fraction of explained variance by each of the principal components, with the first component accounting for 41\% of the full variance, and the second for 17\%. Together, they account for 58\% of the total variance in the data. The inset in the top left panel shows the location of the pixels in the two-dimensional plane spanned by these first two orthogonal principal components. In the rest of the panels, the distribution of the pixels in this 2D plane is color-coded by different features of interest, where it can be seen that the first principal component (x-axis) aligns roughly with the $\log$PAH(11.3/7.7) versus optical line sequence, and the second component (y-axis) aligns roughly with the PAH-to-total dust mass fraction, $R_{\mathrm{PAH}}$. In the bottom row, we mark a group of pixels that show anomalously high $\log$PAH(11.3/7.7) ratios of $\sim$0.7 dex (typical $\log$PAH(11.3/7.7) ratios show a maximum of $\sim$0.4 dex, see Section \ref{sec:results:correlations}). The same group of pixels originates from regions dominated by old stellar populations, very high stellar-to-mid infrared emission ratio (suggesting old and bright populations), and relatively low PAH-to-hot dust ratio. We study this anomalous group in Section \ref{sec:results:anomalous_PAHs}. (*f:feature_display*)

</div>
<div id="div_fig3">

<img src="tmp_2410.02864/./figures/emerging_picture_PAH_ionized_gas.png" alt="Fig14" width="100%"/>

**Figure 14. -** **Emerging picture of PAH band--optical line ratios correlations across nearby star-forming galaxies.****The left panel** depicts a typical nearby star-forming galaxy, where there is a tight correlation between $\log$PAH(11.3/7.7) and $\sii$halpha ratios on 40--150 pc scales. Similarly tight correlations are observed with other optical line ratios: $\oiii$hbeta, $\nii$halpha, and $\oi$halpha. The bottom left part of the relation is dominated by star-forming regions, where young massive stars ionize the gas. The top right part of the relation corresponds to diffuse regions on kpc scales, where a combination of radiation leaking from HII regions and from hot and evolved stars ionizes the gas, giving rise to LINER/LIER-like optical line ratios. The slope of the relation is $\beta \sim 0.2$, roughly matching the expected relation for a varying radiation field that heats the PAHs and ionizes the gas (see \citetalias{baron24}). The small scatter in the relation, $\sim$ 0.025 dex, suggests uniform PAH ionization fraction across different environments and galaxies. **The middle panel** represents a small group of pixels observed in a handful of galaxies, showing anomalously-high $\log$PAH(11.3/7.7) ratios for a constant $\sii$halpha ratio. Such PAH ratios are observed in regions with old stellar populations and unusually high starlight-to-dust emission ratio, $\log$F200W/F770W$>$ 0.4 dex. The anomalous ratios are consistent with PAH populations with larger grain sizes, which can be either the result of processes that change the typical ISM grain population as it streams into the center (destruction via shocks or under-production in grain-grain collisions), or the result of a varying mixing between two grain populations: those of the typical dusty ISM and those produced in AGB star atmospheres. **The right panel** shows a star-forming galaxy with a low luminosity AGN in its center. The AGN contributes to the total radiation field affecting the PAHs and ionized gas on kpc scales, resulting in a small enhancement of $\log$PAH(11.3/7.7)$\sim$0.05 dex$\sim$10\%. The AGN has little impact on the $\sii$halpha ratio, but a stronger impact on the $\oiii$hbeta ratio, with the two optical line ratios consistent with Seyfert-like ionization in standard line diagnostic diagrams. (*f:emerging_picture_PAH_ionized_gas*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2410.02864"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\logX}[1]{\ensuremath{\log(\mathrm{X_{\ce{#1}}})}}$
$\newcommand{\logXratio}[2]{\ensuremath{\log(\mathrm{X_{\ce{#1}} / X_{\ce{#2}} })}}$
$\newcommand{\umontreal}{Department of Physics and Trottier Institute for Research on Exoplanets, Université de Montréal, Montreal, QC, Canada \href{mailto:caroline.piaulet@umontreal.ca}{caroline.piaulet@umontreal.ca}}$
$\newcommand{\thefigure}{A\arabic{figure}}$
$\newcommand{\thetable}{A\arabic{table}}$</div>



<div id="title">

# JWST/NIRISS reveals the water-rich "steam world" atmosphere of GJ 9827 d

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2410.03527-b31b1b.svg)](https://arxiv.org/abs/2410.03527)<mark>Appeared on: 2024-10-07</mark> -  _37 pages, 18 figures, Accepted for publication in ApJL_

</div>
<div id="authors">

C. Piaulet-Ghorayeb, et al. -- incl., <mark>E.-M. Ahrer</mark>, <mark>D. Christie</mark>, <mark>L. Acuña</mark>, <mark>C. Gapp</mark>

</div>
<div id="abstract">

**Abstract:** With sizable volatile envelopes but smaller radii than the solar system ice giants, sub-Neptunes have been revealed as one of the most common types of planet in the galaxy. While the spectroscopic characterization of larger sub-Neptunes (2.5--4R $_\oplus$ ) has revealed hydrogen-dominated atmospheres, smaller sub-Neptunes (1.6--2.5R $_\oplus$ ) could either host thin, rapidly evaporating hydrogen-rich atmospheres or be stable metal-rich “water worlds” with high mean molecular weight atmospheres and a fundamentally different formation and evolutionary history. Here, we present the 0.6--2.8 $\mu$ m JWST NIRISS/SOSS transmission spectrum of GJ 9827 d, the smallest (1.98 R $_\oplus$ ) warm (T $_\mathrm{eq, A_B=0.3} \sim 620$ K) sub-Neptune where atmospheric absorbers have been detected to date. Our two transit observations with NIRISS/SOSS, combined with the existing _HST_ /WFC3 spectrum, enable us to break the clouds-metallicity degeneracy. We detect water in a highly metal-enriched "steam world" atmosphere (O/H of $\sim 4$ by mass and $H_2$ O found to be the background gas with a volume mixing ratio (VMR) of $>31$ \% ). We further show that these results are robust to stellar contamination through the transit light source effect. We do not detect escaping metastable He, which, combined with previous nondetections of escaping He and H, supports the steam atmosphere scenario. In water-rich atmospheres, hydrogen loss driven by water photolysis happens predominantly in the ionized form which eludes observational constraints. We also detect several flares in the NIRISS/SOSS light-curves with far-UV energies of the order of 10 $^{30}$ erg, highlighting the active nature of the star. Further atmospheric characterization of GJ 9827 d probing carbon or sulfur species could reveal the origin of its high metal enrichment.

</div>

<div id="div_fig1">

<img src="tmp_2410.03527/./scarlet_poseidon_posteriors.png" alt="Fig17" width="100%"/>

**Figure 17. -** Posterior distributions for important atmospheric volatile species (top, blue) and the stellar heterogeneity component (bottom, orange), for the POSEIDON retrieval (top rows) using a centered-log-ratios (CLR) agnostic prior on the background gas, and for the SCARLET retrieval (bottom rows) where $H_2$/He is the filler (background) gas. Different colors correspond to different retrievals where spots or faculae are included or excluded (see legend). Our constraints on the HMMW atmospheric volatile species are broadly unaffected by our treatment of stellar contamination, and the retrieval only provides a lower limit on the $H_2$O abundance. For the SCARLET retrieval, the distributions on $T_\mathrm{spot}$ and $T_\mathrm{fac}$ are derived from the posterior distributions on $\Delta T_\mathrm{spot}$, $\Delta T_\mathrm{fac}$ and $T_\mathrm{phot,star}$. The POSEIDON prior on the spot temperature extends all the way to 2300 K, while SCARLET assumes that spots are at most 800 K colder than the photosphere. (*fig:1d_distri_retrieval*)

</div>
<div id="div_fig2">

<img src="tmp_2410.03527/./metallicity_pcloud_Zatm_3cases_new.png" alt="Fig10" width="100%"/>

**Figure 10. -** Constraints on the atmospheric composition from the POSEIDON retrieval with the agnostic (CLR) prior on the background gas (pink), the SCARLET retrieval with $H_2$/He assumed to be the background gas (blue), and SCARLET chemically-consistent retrieval (orange). _Left panel:_ Contours representing the 0.5, 1, 1.5, and 2$\sigma$ levels in the joint posterior probability distribution on the atmospheric metallicity and the gray cloud top pressure, for all three retrievals. _Right panel:_ Kernel density estimations of  the atmospheric metal mass fraction $Z_\mathrm{atm}$ obtained from all posterior samples, for each retrieval setup. (*fig:met_pcloud_zatm*)

</div>
<div id="div_fig3">

<img src="tmp_2410.03527/./wlc_slc_fit_supremeSPOON.png" alt="Fig8" width="100%"/>

**Figure 8. -** _Top panels:_ Results from the white light curve fit of the supreme-SPOON reduction for Visits 1 (left) and 2 (right), and fitted systematics models. The order 1 systematics-corrected light-curves are shown (teal points), binned for visual purposes, along with the best-fit transit model (black) and the systematics model including the GP component (red, with the GP scaled for visualization purposes). The vertical blue shaded regions indicate the three candidate flares with the highest signal-to-noise spectra and following typical flare evolution profiles (see Section \ref{ssec:intransit_varia}).
    _Bottom panels:_ Spectroscopic light-curve fits to the two NIRISS/SOSS transits of GJ 9827 d from the \texttt{supreme-SPOON} reduction. The left panel corresponds to the first visit and the right panel to the second visit. We show the detrended broadband light-curve for order 2, and seven spectroscopic light-curves from order 1, from top to bottom. The best-fit astrophysical model is shown for each light-curve (black) and light-curves are offset relative to each other for clarity.
     (*fig:wlc_slc_fit*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2410.03527"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

208  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

9  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
