# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. Lee  ->  K. Lee  |  ['K. Lee']
J. Li  ->  J. Li  |  ['J. Li']
J. Li  ->  J. Li  |  ['J. Li']
H. Beuther  ->  H. Beuther  |  ['H. Beuther']
D. Semenov  ->  D. Semenov  |  ['D. Semenov']
S. Li  ->  S. Li  |  ['S. Li']
J. Müller  ->  J. Müller-Horn  |  ['J. Müller']


Arxiv has 68 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2406.08612
extracting tarball to tmp_2406.08612... done.
Retrieving document from  https://arxiv.org/e-print/2406.08906



  exec(code_obj, self.user_global_ns, self.user_ns)
'PosixPath' object is not subscriptable


extracting tarball to tmp_2406.08906...

 done.
Retrieving document from  https://arxiv.org/e-print/2406.08935
extracting tarball to tmp_2406.08935...

 done.
Retrieving document from  https://arxiv.org/e-print/2406.09145


extracting tarball to tmp_2406.09145...

 done.


H. Beuther  ->  H. Beuther  |  ['H. Beuther']
D. Semenov  ->  D. Semenov  |  ['D. Semenov']
S. Li  ->  S. Li  |  ['S. Li']


Found 88 bibliographic references in tmp_2406.09145/aa.bbl.
Retrieving document from  https://arxiv.org/e-print/2406.09186


extracting tarball to tmp_2406.09186... done.


J. Müller  ->  J. Müller-Horn  |  ['J. Müller']


Found 86 bibliographic references in tmp_2406.09186/main.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.09145-b31b1b.svg)](https://arxiv.org/abs/2406.09145) | **The Cygnus Allscale Survey of Chemistry and Dynamical Environments: CASCADE III. The large scale distribution of DCO+, DNC and DCN in the DR21 filament**  |
|| I. B. Christensen, et al. -- incl., <mark>H. Beuther</mark>, <mark>D. Semenov</mark>, <mark>S. Li</mark> |
|*Appeared on*| *2024-06-14*|
|*Comments*| *24 pages, 21 figures, accepted to A&A*|
|**Abstract**|            Deuterated molecules and their molecular D/H-ratios (RD(D)) are important diagnostic tools to study the physical conditions of star-forming regions. The degree of deuteration, RD(D), can be significantly enhanced over the elemental D/H-ratio depending on physical parameters. Within the Cygnus Allscale Survey of Chemistry and Dynamical Environments (CASCADE), we aim to explore the large-scale distribution of deuterated molecules in the nearby Cygnus-X region. We focus on the analysis of large-scale structures of deuterated molecules in the filamentary region hosting the prominent Hii region DR21 and DR21(OH). Here we discuss the HCO+, HNC and HCN molecules and their deuterated isotopologues DCO+, DNC and DCN. The spatial distributions of integrated line emissions from DCO+, DNC, and DCN reveal morphological differences. DCO+ displays the most extended emission, characterized by several prominent peaks. Likewise, DNC exhibits multiple peaks, although its emission appears less extended compared to DCO+. In contrast to the extended emission of DCO+ and DNC, DCN appears the least extended, with distinct peaks. Focusing only on the regions where all three molecules are observed, the mean deuteration ratios for each species are 0.01 for both DNC and DCN, and = 0.005 for DCO+. Anti-correlations are found with deuterated molecules and dust temperature or N(H2). The strongest anti-correlation is found with RD(DCO+) and N(H2). The anti-correlation of RD(DCO+) and N(H2) is suggested to be a result of a combination of an increased photodissociation degree and shocks. A strong positive correlation between the ratio of integrated intensities of DCN and DNC with their 13C-isotopologues, are found in high column density regions. The positive relationship between the ratios implies that the D-isotopologue of the isomers could potentially serve as a tracer for the kinetic gas temperature.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.09186-b31b1b.svg)](https://arxiv.org/abs/2406.09186) | **A formation pathway for terrestrial planets with moderate water content involving atmospheric-volatile recycling**  |
|| <mark>J. Müller</mark>, B. Bitsch, A. D. Schneider |
|*Appeared on*| *2024-06-14*|
|*Comments*| *Accepted by A&A, 19 pages, 8 figures*|
|**Abstract**|            Of the many recently discovered terrestrial exoplanets, some are expected to harbor moderate water mass fractions of a few percent. The formation pathways that can produce planets with these water mass fractions are not fully understood. Here, we use the code chemcomp, which consists of a semi-analytical 1D protoplanetary disk model harboring a migrating and accreting planet, to model the growth and composition of planets with moderate water mass fractions by pebble accretion in a protoplanetary disk around a TRAPPIST-1 analog star. This star is accompanied by seven terrestrial planets, of which the outer four planets likely contain water mass fractions of between 1\% and 10\%. We adopt a published model that considers the evaporation of pebbles in the planetary envelope, from where recycling flows can transport the volatile vapor back into the disk. We find that with this model, the planetary water content depends on the influx rate of pebbles onto the planet. A decreasing pebble influx with time reduces the envelope temperature and consequently allows the formation of planets with moderate water mass fractions as inferred for the outer TRAPPIST-1 planets for a number of different simulation configurations. This is further evidence that the recycling of vapor is an important component of planet formation needed to explain the vast and diverse population of exoplanets.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.08906-b31b1b.svg)](https://arxiv.org/abs/2406.08906) | **Kinematics and star formation of hub-filament systems in W49A**  |
|| W. Zhang, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-06-14*|
|*Comments*| *19 pages, 22 figures. Accepted to A&A*|
|**Abstract**|            W49A is a prominent giant molecular cloud (GMC) that exhibits strong star formation activities, yet its structural and kinematic properties remain uncertain. Our study aims to investigate the large-scale structure and kinematics of W49A, and elucidate the role of filaments and hub-filament systems (HFSs) in its star formation activity. We utilized continuum data from Herschel and the James Clerk Maxwell Telescope (JCMT) as well as the molecular lines 12CO (3-2), 13CO (3-2), and C18O (3-2) to identify filaments and HFS structures within W49A. Further analysis focused on the physical properties, kinematics, and mass transport within these structures. Additionally, recombination line emission from the H I/OH/Recombination (THOR) line survey was employed to trace the central H II region and ionized gas. Our findings reveal that W49A comprises one blue-shifted (B-S) HFS and one red-shifted (R-S) HFS, each with multiple filaments and dense hubs. Notably, significant velocity gradients were detected along these filaments, indicative of material transport toward the hubs. High mass accretion rates along the filaments facilitate the formation of massive stars in the HFSs. Furthermore, the presence of V-shaped structures around clumps in position-velocity diagrams suggests ongoing gravitational collapse and local star formation within the filaments. Our results indicate that W49A consists of one R-S HFS and one B-S HFS, and that the material transport from filaments to the hub promotes the formation of massive stars in the hub. These findings underscore the significance of HFSs in shaping the star formation history of W49A.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.08935-b31b1b.svg)](https://arxiv.org/abs/2406.08935) | **Dense Outflowing Molecular Gas in Massive Star-forming Regions**  |
|| Y. Xu, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-06-14*|
|*Comments*| *22 pages, 5 figures, 4 tables, accepted in AJ*|
|**Abstract**|            Dense outflowing gas, traced by transitions of molecules with large dipole moment, is important for understanding mass loss and feedback of massive star formation. HCN 3-2 and HCO$^+$ 3-2 are good tracers of dense outflowing molecular gas, which are closely related to active star formation. In this study, we present on-the-fly (OTF) mapping observations of HCN 3-2 and HCO$^+$ 3-2 toward a sample of 33 massive star-forming regions using the 10-m Submillimeter Telescope (SMT). With the spatial distribution of line wings of HCO$^+$ 3-2 and HCN 3-2, outflows are detected in 25 sources, resulting in a detection rate of 76$\%$. The optically thin H$^{13}$CN and H$^{13}$CO$^+$ 3-2 lines are used to identify line wings as outflows and estimate core mass. The mass $M_{out}$, momentum $P_{out}$, kinetic energy $E_{K}$, force $F_{out}$ and mass loss rate $\dot M_{out}$ of outflow and core mass, are obtained for each source. A sublinear tight correlation is found between the mass of dense molecular outflow and core mass, with an index of $\sim$ 0.8 and a correlation coefficient of 0.88.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.08612-b31b1b.svg)](https://arxiv.org/abs/2406.08612) | **Observation of Declination Dependence in the Cosmic Ray Energy Spectrum**  |
|| T. A. Collaboration, et al. -- incl., <mark>K. Lee</mark> |
|*Appeared on*| *2024-06-14*|
|*Comments*| *8 pages, 6 figures*|
|**Abstract**|            We report on an observation of the difference between northern and southern skies of the ultrahigh energy cosmic ray energy spectrum with a significance of ${\sim}8\sigma$. We use measurements from the two largest experiments$\unicode{x2014}$the Telescope Array observing the northern hemisphere and the Pierre Auger Observatory viewing the southern hemisphere. Since the comparison of two measurements from different observatories introduces the issue of possible systematic differences between detectors and analyses, we validate the methodology of the comparison by examining the region of the sky where the apertures of the two observatories overlap. Although the spectra differ in this region, we find that there is only a $1.8\sigma$ difference between the spectrum measurements when anisotropic regions are removed and a fiducial cut in the aperture is applied.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error 'PosixPath' object is not subscriptable</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2406.09145.md
    + _build/html/tmp_2406.09145/./Illustrations/All_deuterated.png
    + _build/html/tmp_2406.09145/./Illustrations/Ionization_HCO.png
    + _build/html/tmp_2406.09145/./Illustrations/CumDistr_deut.png
exported in  _build/html/2406.09186.md
    + _build/html/tmp_2406.09186/./figs/new/Fig_3_NEW_with_R0.png
    + _build/html/tmp_2406.09186/./figs/new/3x3_ENVTOT_evap_REFEREE.png
    + _build/html/tmp_2406.09186/./figs/new/Fig_4_NEW_with_R0.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\hii}{H{\sc ii}}$
$\newcommand{\hi}{H{\sc i}}$
$\newcommand{\kms}{km s^{-1}}$</div>



<div id="title">

# The Cygnus Allscale Survey of Chemistry and Dynamical Environments: CASCADE

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.09145-b31b1b.svg)](https://arxiv.org/abs/2406.09145)<mark>Appeared on: 2024-06-14</mark> -  _24 pages, 21 figures, accepted to A&A_

</div>
<div id="authors">

I. B. Christensen, et al. -- incl., <mark>H. Beuther</mark>, <mark>D. Semenov</mark>, <mark>S. Li</mark>

</div>
<div id="abstract">

**Abstract:** Deuterated molecules and their molecular D/H-ratios ( $R_D$ (D)) are important diagnostic tools to study the physical conditions of star-forming regions. The degree of deuteration, $R_D$ (D), can be significantly enhanced over the elemental D/H-ratio depending on physical parameters such as temperature, density, and ionization fraction. Within the **C** ygnus **A** llscale **S** urvey of **C** hemistry **a** nd **D** ynamical **E** nvironments (CASCADE), we aim to explore the large-scale distribution of deuterated molecules in the nearby ( $d \sim 1.5$ kpc) Cygnus-X region, a giant molecular cloud complex that hosts multiple sites of high mass star formation. We focus on the analysis of large-scale structures of deuterated molecules in the filamentary region hosting the prominent $\hii$ region DR21 and DR21(OH), a molecular hot core that is at an earlier evolutionary state. The DR21 filament has been imaged using the IRAM 30-m telescope in a variety of deuterated molecules and transitions.    Here we discuss the $\ce{HCO+}$ , HNC and HCN molecules and their deuterated isotopologues $\ce{DCO+}$ , DNC and DCN, and their observed line emissions at 3.6, 2, and 1.3-mm. The spatial distributions of integrated line emissions from $\ce{DCO+}$ , DNC, and DCN reveal morphological differences. Notably, $\ce{DCO+}$ displays the most extended emission, characterized by several prominent peaks. Likewise, DNC exhibits multiple peaks, although its emission appears less extended compared to $\ce{DCO+}$ . In contrast to the extended emission of $\ce{DCO+}$ and DNC, DCN appears the least extended, with distinct peaks.    Focusing only on the regions where all three molecules are observed, the mean deuteration ratios for each species, $R_D$ , are 0.01 for both DNC and DCN, and $=0.005$ for $\ce{DCO+}$ , respectively.    Anti-correlations are found with deuterated molecules and dust temperature or $N$ ( $\ce{H2}$ ). The strongest anti-correlation is found with $R_D$ ( $\ce{DCO+}$ ) and $N$ ( $\ce{H2}$ ), with a Pearson correlation coefficient of $\rho = -0.74$ .    We analyze the SiO emission as a tracer for shocks and the $N$ (HCO)/ $N$ ( $\ce{H^13CO+}$ ) as a tracer for increased photodissociation by UV.    The anti-correlation of $R_D$ ( $\ce{DCO+}$ ) and $N$ ( $\ce{H2}$ ) is suggested to be a result of a combination of an increased photodissociation degree and shocks.    A strong positive correlation between the ratio of integrated intensities of DCN and DNC with their $^{13}$ C-isotopologues, are found in high column density regions.    The positive relationship between the ratios implies that the D-isotopologue of the isomers could potentially serve as a tracer for the kinetic gas temperature.

</div>

<div id="div_fig1">

<img src="tmp_2406.09145/./Illustrations/All_deuterated.png" alt="Fig11" width="100%"/>

**Figure 11. -** Top and middle rows: The ground-state emission of \ce{DCO+}, DNC and DCN observed with the CASCADE-program and follow-up mapping with the IRAM-30m telescope. Bottom row: Maps for \ce{^13C} isotopologs. The intensities in  the maps are integrated over the velocity range $-7$ $\kms$ to $+0.5$\kms. HPBWs are displayed by the filled white circles in the lower left corner of each panel.
  For example,  the HPBW beam-size  is $34$\arcsec$$ for the map showing DNC (2--1),  16$\arcsec$ for DCO$^+$(3--2) and $11$\arcsec for  DCN (3--2).
  The white contours are showing levels of $4\sigma$,  $8\sigma$ and $12\sigma$. The stars mark DR21 Main (red) and DR21(OH) (blue). The locations of the F1 and F3 sub-filament  ([Schneider, Csengeri and Bontemps 2010]())  are indicated by red and magenta dashed curves, respectively.  (*fig:deutmaps*)

</div>
<div id="div_fig2">

<img src="tmp_2406.09145/./Illustrations/Ionization_HCO.png" alt="Fig5" width="100%"/>

**Figure 5. -** $N$(HCO)/$N$(\ce{H^13CO+}) vs. $N$($H_2$) (top panel) and dust temperature (middle panel). The bottom panel shows the correlation of $N$(HCO)/$N$(\ce{H^13CO+}) with $R_D$(\ce{DCO+}). The purple points represent pixels in the DR21 filament above $4\sigma$ and the grey point shows the ATLASGAL sources from [Kim, Wyrowski and Urquhart (2020)](). The black vertical line represents the indicator of ongoing FUV chemistry $\geq 1$ ([Gerin, et. al 2009](), [Kim, Wyrowski and Urquhart 2020]()) .
  The blue star mark the surroundings of DR21(OH), where \ce{HCO}, \ce{H^13CO+} and \ce{DCO+} are detected. (*fig:ionizationHCO*)

</div>
<div id="div_fig3">

<img src="tmp_2406.09145/./Illustrations/CumDistr_deut.png" alt="Fig2" width="100%"/>

**Figure 2. -** Cumulative distribution function (CDF) of dust temperature and \ce{H2} column density where \ce{DCO+}(purple), DNC (orange) or DCN (blue) is detected in the DR21 filament.
  The dust temperatures and \ce{H2} column density maps are taken from [Bonne, Bontemps and Schneider (2023)]().
  The horizontal dashed lines marks the mean dust temperature or column density of the sampled distributions, colorized with the respective detected molecular ratio. (*fig:phys_distribution*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.09145"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# A formation pathway for terrestrial planets with moderate water content involving atmospheric-volatile recycling

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.09186-b31b1b.svg)](https://arxiv.org/abs/2406.09186)<mark>Appeared on: 2024-06-14</mark> -  _Accepted by A&A, 19 pages, 8 figures_

</div>
<div id="authors">

<mark>J. Müller</mark>, B. Bitsch, A. D. Schneider

</div>
<div id="abstract">

**Abstract:** Of the many recently discovered terrestrial exoplanets, some are expected to harbor moderate water mass fractions of a few percent. The formation pathways that can produce planets with these water mass fractions are not fully understood.Here, we use the code \texttt{chemcomp} , which consists of a semi-analytical 1D protoplanetary disk model harboring a migrating and accreting planet, to model the growth and composition of planets with moderate water mass fractions by pebble accretion in a protoplanetary disk around a TRAPPIST-1 analog star. This star is accompanied by seven terrestrial planets, of which the outer four planets likely contain water mass fractions of between 1 \% and 10 \% .We adopt a published model that considers the evaporation of pebbles in the planetary envelope, from where recycling flows can transport the volatile vapor back into the disk.We find that with this model, the planetary water content depends on the influx rate of pebbles onto the planet. A decreasing pebble influx with time reduces the envelope temperature and consequently allows the formation of planets with moderate water mass fractions as inferred for the outer TRAPPIST-1 planets for a number of different simulation configurations.This is further evidence that the recycling of vapor is an important component of planet formation needed to explain the vast and diverse population of exoplanets.

</div>

<div id="div_fig1">

<img src="tmp_2406.09186/./figs/new/Fig_3_NEW_with_R0.png" alt="Fig2" width="100%"/>

**Figure 2. -** Comparison of planets using the simple envelope opacity model of [Johansen, Ronnet and Bizzarro (2021)]() with $\kappa_{\rm env,0}= 0.1$ cm$^2$ g$^{-1}$(see Eq. \ref{eq: envelope opacity johansen}; transparent lines) and the Brouwers opacity model by [Brouwers, et. al (2021)]() with $v_{\rm lim} = 30.0$ m s$^{-1}$ and $F = 0.0$(see Eq. \ref{eq: envelope opacity brouwers}; dotted lines) for different initial disk masses _ (left)_ and different initial disk radii _ (right)_. The pebbles in the disk follow the motion of the gas and behave similarly as in [Johansen, Ronnet and Bizzarro (2021)](). The planets start at a disk temperature of 100 K and migrate toward the inner edge of the disk within 1.5 Myr, causing the simulation to end. The growth of the planet initiates at $t_0 = 0.1$ Myr. The maximum disk lifetime is set to 5 Myr, but for the sake of clarity we only show the first 3 Myr in the bottom two rows.
    The light-blue region corresponds to the estimated total water mass fraction of the Earth, consisting of contributions from the hydrosphere, the exosphere including the crust, and the mantle \citep[e.g., ][]{marty2012, dangelo+2019}. _First row:_ Growth tracks. Due to the viscous heating of the protoplanetary disk, the initial position of the planet changes for different initial disk masses and radii, as we want to keep it in the same relative position exterior to the water-evaporation line. _Second row:_ Planetary water mass fraction as a function of planetary mass. For the sake of clarity, only the first 3 Myr are shown in this and the following row. _Third row:_ Planetary water mass fraction as a function of time. _Last row:_ Effective pebble accretion rates onto the planet as a function of time. (*fig: johopac*)

</div>
<div id="div_fig2">

<img src="tmp_2406.09186/./figs/new/3x3_ENVTOT_evap_REFEREE.png" alt="Fig7" width="100%"/>

**Figure 7. -** Final planetary water mass fraction versus initial local disk temperature at the position of the planet as a proxy for the distance from the star.
    Planets that are not shown in the plot have lower water mass fractions than 10$^{-5}$.
    _The columns_ correspond to different values of the $\alpha$-viscosity, _the rows_ show different solid fragmentation velocity values. Each marker corresponds to one planet starting at a specific disk temperature. The color of the markers indicates the final mass of the planet (see color-bars on the right). The shape of the markers specifies the initial mass of the disk in which the planet grows (see legend).
    The hatched regions in the background correspond to the 1$\sigma$-regions of the water mass fractions of the TRAPPIST-1 planets estimated by [Raymond, Izidoro and Bolmont (2022)]() using their model ii and are color-coded according to the observed masses of the TRAPPIST-1 planets using the same color-bar.
    The vertical lines indicate the location of the water-evaporation front. We use our full model, which consists of both the Brouwers opacity model ($v_{\rm lim} = 30.0$ m s$^{-1}$, $F = 0.0$) and the Birnstiel pebble drift, while applying the planetary envelope model described above. In addition, we account for the planetary migration and the evaporation of pebbles in the disk as they drift inwards. (*fig: ENVTOT evap*)

</div>
<div id="div_fig3">

<img src="tmp_2406.09186/./figs/new/Fig_4_NEW_with_R0.png" alt="Fig3" width="100%"/>

**Figure 3. -** Comparison of planets growing in a disk with gas-sensitive pebbles (similar to [Johansen, Ronnet and Bizzarro (2021)](); transparent lines) and a disk in which the pebbles follow the Birnstiel pebble drift (dashed lines) for different initial disk masses. The transparent lines are the same as the transparent lines shown in Fig. \ref{fig: johopac}. For these two sets of simulations (transparent and dashed lines), we use the simple envelope opacity model with $\kappa_{\rm env,0}= 0.1$ cm$^2$ g$^{-1}$(see Eq. \ref{eq: envelope opacity johansen}).
    In addition, we show simulations using our full model as solid lines.
    In our full model, the planets grow in a disk with with pebbles drifting according to the Birnstiel pebble drift model while considering the Brouwers opacity model with $v_{\rm lim} = 30.0$ m s$^{-1}$ and $F = 0.0$(see Eq. \ref{eq: envelope opacity brouwers}). The layout of the plot and the other simulation parameters are similar to Fig. \ref{fig: johopac}. (*fig: johdrift*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.09186"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

96  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

9  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
