# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

C. Gapp  ->  C. Gapp  |  ['C. Gapp']
H. Beuther  ->  H. Beuther  |  ['H. Beuther']
T. Henning  ->  T. Henning  |  ['T. Henning']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']


Arxiv has 82 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2501.02081


extracting tarball to tmp_2501.02081...

 done.


C. Gapp  ->  C. Gapp  |  ['C. Gapp']


Found 68 bibliographic references in tmp_2501.02081/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2501.02085


extracting tarball to tmp_2501.02085...

 done.


Found 65 bibliographic references in tmp_2501.02085/new_paper_revised_v2_arxiv.bbl.
Retrieving document from  https://arxiv.org/e-print/2501.02206


extracting tarball to tmp_2501.02206...

 done.
Retrieving document from  https://arxiv.org/e-print/2501.02720


extracting tarball to tmp_2501.02720...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.02081-b31b1b.svg)](https://arxiv.org/abs/2501.02081) | **Statistical trends in JWST transiting exoplanet atmospheres**  |
|| G. Fu, et al. -- incl., <mark>C. Gapp</mark> |
|*Appeared on*| *2025-01-07*|
|*Comments*| *Accepted to ApJ, JWST keeps on delivering!*|
|**Abstract**|            Our brains are hardwired for pattern recognition as correlations are useful for predicting and understanding nature. As more exoplanet atmospheres are being characterized with JWST, we are starting to unveil their properties on a population level. Here we present a framework for comparing exoplanet transmission spectroscopy from 3 to 5$\mu$m with four bands: L (2.9 - 3.7$\mu$m), SO$_2$ (3.95 - 4.1$\mu$m), CO$_2$ (4.25 - 4.4$\mu$m) and CO (4.5 - 4.9$\mu$m). Together, the four bands cover the major carbon, oxygen, nitrogen, and sulfur-bearing molecules including H$_2$O, CH$_4$, NH$_3$, H$_2$S, SO$_2$, CO$_2$, and CO. Among the eight high-precision gas giant exoplanet planet spectra we collected, we found strong correlations between the SO$_2$-L index and planet mass (r=-0.41$\pm$0.09) and temperature (r=-0.64$\pm$0.08), indicating SO$_2$ preferably exists (SO$_2$-L$>$-0.5) among low mass ($\sim<$0.3M$_J$) and cooler ($\sim<$1200K) targets. We also observe strong temperature dependency for both CO$_2$-L and CO-L indices. Under equilibrium chemistry and isothermal thermal structure assumptions, we find that the planet sample favors super-solar metallicity and low C/O ratio ($<$0.7). In addition, the presence of a mass-metallicity correlation is favored over uniform metallicity with the eight planets. We further introduce the SO$_2$-L versus CO$_2$-L diagram alike the color-magnitude diagram for stars and brown dwarfs. All reported trends here will be testable and be further quantified with existing and future JWST observations within the next few years.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.02085-b31b1b.svg)](https://arxiv.org/abs/2501.02085) | **JOYS: The [D/H] abundance derived from protostellar outflows across the Galactic disk measured with JWST**  |
|| L. Francis, et al. -- incl., <mark>H. Beuther</mark>, <mark>T. Henning</mark> |
|*Appeared on*| *2025-01-07*|
|*Comments*| *30 Pages, 20 Figures. Accepted for publication in Astronomy and Astrophysics*|
|**Abstract**|            The total deuterium abundance [D/H] in the universe is set by just two processes: the creation of deuterium in Big Bang Nucleosynthesis at an abundance of [D/H]$=2.58\pm0.13\times10^{-5}$, and its destruction within stellar interiors. Measurements of the total [D/H] abundance can potentially provide a probe of Galactic chemical evolution, however, most measurements of [D/H] are only sensitive to the gas-phase deuterium, and the amount of deuterium sequestered in carbonaceous dust grains is debated. With the launch of JWST, it is now possible to measure the gas-phase [D/H] at unprecedented sensitivity and distances through observation of mid-IR lines of H$_2$ and HD. We employ data from the JWST Observations of Young protoStars (JOYS) program to measure the gas-phase [D/H] abundance with a rotation diagram analysis towards 5 nearby low-mass and 5 distant high-mass protostellar outflows. The gas-phase [D/H] varies between low-mass sources by up to a factor of $\sim4$, despite these sources likely having formed in a region of the Galactic disk that would be expected to have nearly constant total [D/H]. Most measurements of gas-phase [D/H] from our work or previous studies produce [D/H] $\lesssim 1.0\times10^{-5}$, a factor of $2-4$ lower than found from local UV absorption lines and as expected from Galactic chemical evolution models. The variations in [D/H] between our low-mass sources and the low [D/H] with respect to Galactic chemical evolution models suggest that our observations are not sensitive to the total [D/H]. Significant depletion of deuterium onto carbonaceous dust grains is a possible explanation, and tentative evidence of enhanced [D/H] towards shock positions with higher gas-phase Fe abundance is seen in the HH 211 outflow. Deeper observations of HD and H$_2$ in shocked environments and modelling of dust-grain destruction are warranted to test for the effects of depletion.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.02206-b31b1b.svg)](https://arxiv.org/abs/2501.02206) | **A Comprehensive Catalog of Emission-line Nebulae, Star Clusters, and Supergiants in M31 from the LAMOST Spectroscopic Survey**  |
|| P. Chen, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-01-07*|
|*Comments*| *17 pages, 9 figures; Accepted for publication in AJ*|
|**Abstract**|            Spectroscopic observations of various tracers in nearby galaxies, such as Andromeda (M31), play a crucial role in identifying and classifying individual stellar populations and nebular objects, thereby enhancing our understanding of galactic composition, environment, and dynamics as well as stellar evolution. While the LAMOST (Large Sky Area Multi-Object Fibre Spectroscopic Telescope) survey of M31 has produced extensive datasets, a comprehensive catalog of emission-line nebulae, star clusters, and supergiants is yet to be completed. In this paper, we present a final catalog of 384 emission-line nebulae, 380 star clusters, and 375 supergiants and candidates in M31, as carefully selected and identified from the LAMOST spectroscopic database. These objects were classified using a random forest algorithm, followed by thorough visual examinations of their spectral characteristics as well as morphologies revealed by archive images. For emission-line nebulae, we measured radial velocities and relative fluxes of emission lines, enabling further classification of planetary nebulae and HII regions. Additionally, we identified 245 emission-line nebulae in M33. This work lays the data foundation for the study of M31, and offers valuable tracers to investigate M31's structure and evolution.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.02720-b31b1b.svg)](https://arxiv.org/abs/2501.02720) | **Contribution of Unresolved Sources to Diffuse Gamma-Ray Emission from the Galactic Plane**  |
|| J. He, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2025-01-07*|
|*Comments*| *9 pages, 5 figures, accepted for publication in ApJ*|
|**Abstract**|            The diffuse gamma-ray emission from the Milky Way serves as a crucial probe for understanding the propagation and interactions of cosmic rays within our galaxy. The Galactic diffuse gamma-ray emission between 10 TeV and 1 PeV has been recently measured by the square kilometer array (KM2A) of the Large High Altitude Air Shower Observatory (LHAASO). The flux is higher than predicted for cosmic rays interacting with the interstellar medium. In this work, we utilize a non-parametric method to derive the source count distribution using the published first LHAASO source catalog. Based on this distribution, we calculate the contribution of unresolved sources to the diffuse emission measured by KM2A. When comparing our results to the measured diffuse gamma-ray emission, we demonstrate that for the outer Galactic region, the contributions from unresolved sources and those predicted by models are roughly consistent with experimental observations within the uncertainty. However, for the inner Galactic region, additional components are required to account for the observed data.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2501.02081/./SO2_models.png', 'tmp_2501.02081/./fig1.png', 'tmp_2501.02081/./fig3.png']
copying  tmp_2501.02081/./SO2_models.png to _build/html/
copying  tmp_2501.02081/./fig1.png to _build/html/
copying  tmp_2501.02081/./fig3.png to _build/html/
exported in  _build/html/2501.02081.md
    + _build/html/tmp_2501.02081/./SO2_models.png
    + _build/html/tmp_2501.02081/./fig1.png
    + _build/html/tmp_2501.02081/./fig3.png
found figures ['tmp_2501.02085/./HH211_moment0.png', 'tmp_2501.02085/./IRAS23385_SourceN_extinction_uncertainty.png', 'tmp_2501.02085/./DtoH_scatter_twoclumn.png']
copying  tmp_2501.02085/./HH211_moment0.png to _build/html/
copying  tmp_2501.02085/./IRAS23385_SourceN_extinction_uncertainty.png to _build/html/
copying  tmp_2501.02085/./DtoH_scatter_twoclumn.png to _build/html/
exported in  _build/html/2501.02085.md
    + _build/html/tmp_2501.02085/./HH211_moment0.png
    + _build/html/tmp_2501.02085/./IRAS23385_SourceN_extinction_uncertainty.png
    +

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# Statistical trends in JWST transiting exoplanet atmospheres

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.02081-b31b1b.svg)](https://arxiv.org/abs/2501.02081)<mark>Appeared on: 2025-01-07</mark> -  _Accepted to ApJ, JWST keeps on delivering!_

</div>
<div id="authors">

G. Fu, et al. -- incl., <mark>C. Gapp</mark>

</div>
<div id="abstract">

**Abstract:** Our brains are hardwired for pattern recognition as correlations are useful for predicting and understanding nature. As more exoplanet atmospheres are being characterized with JWST, we are starting to unveil their properties on a population level. Here we present a framework for comparing exoplanet transmission spectroscopy from 3 to 5 $\mu$ m with four bands: L (2.9 - 3.7 $\mu$ m), $SO_2$ (3.95 - 4.1 $\mu$ m), $CO_2$ (4.25 - 4.4 $\mu$ m) and CO (4.5 - 4.9 $\mu$ m). Together, the four bands cover the major carbon, oxygen, nitrogen, and sulfur-bearing molecules including $H_2$ O, $CH_4$ , $NH_3$ , $H_2$ S, $SO_2$ , $CO_2$ , and CO. Among the eight high-precision gas giant exoplanet planet spectra we collected, we found strong correlations between the $SO_2$ -L index and planet mass (r=-0.41 $\pm$ 0.09) and temperature (r=-0.64 $\pm$ 0.08), indicating $SO_2$ preferably exists ($SO_2$ -L $>$ -0.5) among low mass ( $\sim<$ 0.$3M_J$ ) and cooler ( $\sim<$ 1200K) targets. We also observe strong temperature dependency for both $CO_2$ -L and CO-L indices. Under equilibrium chemistry and isothermal thermal structure assumptions, we find that the planet sample favors super-solar metallicity and low C/O ratio ( $<$ 0.7). In addition, the presence of a mass-metallicity correlation is favored over uniform metallicity with the eight planets. We further introduce the $SO_2$ -L versus $CO_2$ -L diagram alike the color-magnitude diagram for stars and brown dwarfs. All reported trends here will be testable and be further quantified with existing and future JWST observations within the next few years.

</div>

<div id="div_fig1">

<img src="tmp_2501.02081/./SO2_models.png" alt="Fig4" width="100%"/>

**Figure 4. -** Volume mixing ratio of $SO_2$ in the atmosphere as a function of metallicity for the model grid at 1100K (Top left). Average $SO_2$ VMR across 10 to 0.01 mbar pressure levels versus temperature for five metallicity values (Top right). The model grid-predicted $SO_2$-L values versus temperature for the five metallicity values are shown in the bottom left, with the metallicities converged to planet masses (shown in color) assuming the solar system mass-metallicity relation. The bottom right shows the $SO_2$-L model values versus planet mass at constant temperatures, again with the model metallicities converted to mass via the solar system mass-metallicity relation. (*SO2_models*)

</div>
<div id="div_fig2">

<img src="tmp_2501.02081/./fig1.png" alt="Fig2" width="100%"/>

**Figure 2. -** Transmission spectra included in this study. All spectra are normalized by their respective atmospheric scale heights (H) and plotted with a vertical offset. All spectra cover the 2.7 to 5 $\mu$m wavelength range. We picked four bands: L (2.9 - 3.7$\mu$m), $SO_2$(3.95 - 4.1$\mu$m), $CO_2$(4.25 - 4.4$\mu$m) and CO (4.5 - 4.9$\mu$m) which are color shaded with blue, orange, red and grey respectively. These four bands cover major oxygen, carbon, and sulfur-bearing molecules such as $H_2$O, $CH_4$, $NH_3$, $H_2$S, $SO_2$, $CO_2$ and CO. (*fig1*)

</div>
<div id="div_fig3">

<img src="tmp_2501.02081/./fig3.png" alt="Fig5" width="100%"/>

**Figure 5. -** $CO_2$-L versus equilibrium temperature (upper left), planet mass (upper right), host star effective temperature (lower left), and planet surface gravity (lower right). There are no clear linear trends, which is expected as $CO_2$ is sensitive to temperature non-monotonically. We overplotted two sets of forward \texttt{PLATON} models with different Z and C/O in the top left panel. (*fig3*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.02081"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\lf}$
$\newcommand{\rgc}{R_\mathrm{GC}}$</div>



<div id="title">

# JOYS: The [D/H] abundance derived from protostellar outflows across the Galactic disk measured with JWST

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.02085-b31b1b.svg)](https://arxiv.org/abs/2501.02085)<mark>Appeared on: 2025-01-07</mark> -  _30 Pages, 20 Figures. Accepted for publication in Astronomy and Astrophysics_

</div>
<div id="authors">

L. Francis, et al. -- incl., <mark>H. Beuther</mark>, <mark>T. Henning</mark>

</div>
<div id="abstract">

**Abstract:** The total deuterium abundance [ D/H ] in the universe is set by just two processes: the creation of deuterium in Big Bang Nucleosynthesis at an abundance of [ D/H ] $=2.58\pm0.13\times10^{-5}$ , and its destruction within stellar interiors (astration). Measurements of variations in the total [ D/H ] abundance can thus potentially provide a probe of Galactic chemical evolution. However, most observational measurements of [ D/H ] are only sensitive to the gas-phase deuterium, and the amount of deuterium sequestered in dust grains is debated. With the launch of the James Webb Space Telescope (JWST), it is now possible to measure the gas-phase [ D/H ] at unprecedented sensitivity and distances through observation of mid-IR lines of $H_2$ and HD. Comparisons of gas-phase [ D/H ] with the constraints on the total [ D/H ] from the primordial abundance and Galactic chemical evolution models can provide insight into the degree of Deuterium lock-up in grains and the star formation history of our Galaxy. We use data from the JWST Observations of Young protoStars (JOYS) program of 5 nearby and resolved low-mass protostellar outflows and 5 distant high-mass protostellar outflows taken with the JWST Mid Infrafred Instrument (MIRI) Medium Resolution Spectrometer (MRS) to measure gas-phase [ D/H ] via $H_2$ and HD lines, assuming the gas is fully molecular. We extract spectra from various locations in the outflows. Using a rotational diagram analysis covering lines of $H_2$ and HD with similar excitation energies, we derive the column density of HD and $H_2$ or their upper limits. We then calculate the gas-phase [ D/H ] from the column density results, and additionally apply a correction factor for the effect of chemical conversion of HD to atomic D and non-LTE excitation on the HD abundance in the shocks. To investigate the spatial distribution of the bulk gas and species refractory species associated with the dust grains, we also construct integrated line intensity maps of $H_2$ , HD, [ Fe II ] , [ Fe I ] , and [ S I ] lines. A comparison of gas-phase [ D/H ] between our low-mass sources shows variations of up to a factor of $\sim4$ , despite these sources likely having formed in nearly the same region of the Galactic disk that would be expected to have nearly constant total [ D/H ] . Most measurements of gas-phase [ D/H ] from our work or previous studies produce [ D/H ] $\lesssim 1.0\times10^{-5}$ , a factor of $2-4$ lower than found from local UV absorption lines and as expected from Galactic chemical evolution models. In the integrated line intensity maps, the morphology of the HD R(6) line emission is strongly correlated with the $H_2$ S(7), [ S I ] , and [ Fe I ] lines which mostly trace high velocity jet knots and bright bow-shocks. In our extracted spectra along the outflows, there is similarly a strong correlation between the $H_2$ and HD column density and the [ S I ] and [ Fe I ] line flux, however, no correlation is seen between [ D/H ] and the [ S I ] or [ Fe I ] line flux. The variations in [ D/H ] between our low-mass sources and the low [ D/H ] with respect to Galactic chemical evolution models suggest that our observations are not sensitive to the total [ D/H ] . Significant depletion of deuterium onto carbonaceous dust grains is a possible explanation, and tentative evidence of enhanced [ D/H ] towards positions with higher gas-phase Fe abundance is seen in the HH 211 outflow. Deeper observations of HD and $H_2$ across a wider range of shock conditions and modelling of the carbonaceous dust-grain destruction and shock conditions are warranted to test for the effects of depletion.

</div>

<div id="div_fig1">

<img src="tmp_2501.02085/./HH211_moment0.png" alt="Fig6" width="100%"/>

**Figure 6. -** Integrated line intensity maps for HH 211  of various lines and the continuum at 17 $\mu$m shown with a logarithmic stretch. The maps have been smoothed to a common resolution of 1$\arcsec$, shown by the white circle in the bottom-left. Apertures used for spectral extraction are shown by the green circles. An index for each aperture is provided in the top-left panel. The coordinates of the aperture centers can be found in Table \ref{tab:apertures} of App. \ref{sec:app_apertures}. (*fig:moment0_1*)

</div>
<div id="div_fig2">

<img src="tmp_2501.02085/./IRAS23385_SourceN_extinction_uncertainty.png" alt="Fig5" width="100%"/>

**Figure 5. -** Example of the effect of extinction uncertainty on the derived column densities and [D/H] for IRAS 23385+6053 ap 1. _ Upper panel_: Effect of varying extinction on derived column density for the warm $H_2$ component and HD. _ Lower panel_: Effect of varying extinction on the derived $\mathrm{[D/H]}$. The uncertainty in $A_K$ from the single component fit to S(1)-S(4) transitions of $H_2$ is shown as the green shaded region, while the equivalent uncertainty in $\mathrm{[D/H]}$ is shown as the blue shaded region. (*fig:extinction_uncertainty*)

</div>
<div id="div_fig3">

<img src="tmp_2501.02085/./DtoH_scatter_twoclumn.png" alt="Fig10" width="100%"/>

**Figure 10. -** Comparison of [D/H] abundance (Table \ref{tab:DtoH}) in this work. A correction factor of 2.54 has been applied to our [D/H] measurements (see text). Orange symbols show [D/H] in apertures for high-mass sources, while green symbols show [D/H] in apertures from low-mass sources. The dotted grey lines separate different sources. The blue shaded region shows the primordial total [D/H] abundance of  ([Cyburt, et. al 2016]()) . [D/H] upper limits above the primordial abundance are omitted. (*fig:DtoH_scatter*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.02085"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

295  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
