# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

T. Henning  ->  T. Henning  |  ['T. Henning']
P. Mollière  ->  P. Mollière  |  ['P. Mollière']
L. Boogaard  ->  L. Boogaard  |  ['L. Boogaard']
F. Walter  ->  F. Walter  |  ['F. Walter']


T. Henning  ->  T. Henning  |  ['T. Henning']
M. Samland  ->  M. Samland  |  ['M. Samland']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
J. Bouwman  ->  J. Bouwman  |  ['J. Bouwman']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
J. Schreiber  ->  J. Schreiber  |  ['J. Schreiber']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
Arxiv has 106 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2406.12984


extracting tarball to tmp_2406.12984...

 done.
  1: tmp_2406.12984/aassymbols.tex, 579 lines
  2: tmp_2406.12984/vla1623w.tex, 501 lines
Retrieving document from  https://arxiv.org/e-print/2406.13037
extracting tarball to tmp_2406.13037...


  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


 done.


Found 64 bibliographic references in tmp_2406.13037/aanda.bbl.
Retrieving document from  https://arxiv.org/e-print/2406.13554
extracting tarball to tmp_2406.13554... done.


L. Boogaard  ->  L. Boogaard  |  ['L. Boogaard']
F. Walter  ->  F. Walter  |  ['F. Walter']


Found 115 bibliographic references in tmp_2406.13554/Manuscript.bbl.
Retrieving document from  https://arxiv.org/e-print/2406.14293


not a gzip file


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.13037-b31b1b.svg)](https://arxiv.org/abs/2406.13037) | **Large Interferometer For Exoplanets (LIFE): XIII. The Value of Combining Thermal Emission and Reflected Light for the Characterization of Earth Twins**  |
|| E. Alei, et al. -- incl., <mark>P. Mollière</mark> |
|*Appeared on*| *2024-06-21*|
|*Comments*| *16 pages (main text, incl. 12 figures) + appendix; accepted for publication in A&A (current version: post 1st revision). Thirteenth paper of LIFE telescope series*|
|**Abstract**|            Following the recommendations to NASA and ESA, the search for life on exoplanets will be a priority in the next decades. Two direct imaging space mission concepts are being developed: the Habitable Worlds Observatory (HWO) and the Large Interferometer for Exoplanets (LIFE). HWO focuses on reflected light spectra in the ultraviolet/visible/near-infrared (UV/VIS/NIR), while LIFE captures the mid-infrared (MIR) emission of temperate exoplanets. We assess the potential of HWO and LIFE in characterizing a cloud-free Earth twin orbiting a Sun-like star at 10 pc, both separately and synergistically, aiming to quantify the increase in information from joint atmospheric retrievals on a habitable planet. We perform Bayesian retrievals on simulated data from an HWO-like and a LIFE-like mission separately, then jointly, considering the baseline spectral resolutions currently assumed for these concepts and using two increasingly complex noise simulations. HWO would constrain H$_2$O, O$_2$, and O$_3$, in the atmosphere, with ~ 100 K uncertainty on the temperature profile. LIFE would constrain CO$_2$, H$_2$O, O$_3$ and provide constraints on the thermal atmospheric structure and surface temperature (~ 10 K uncertainty). Both missions would provide an upper limit on CH$_4$. Joint retrievals on HWO and LIFE data would accurately define the atmospheric thermal profile and planetary parameters, decisively constrain CO$_2$, H$_2$O, O$_2$, and O$_3$, and weakly constrain CO and CH$_4$. The detection significance is greater or equal to single-instrument retrievals. Both missions provide specific information to characterize a terrestrial habitable exoplanet, but the scientific yield is maximized with synergistic UV/VIS/NIR+MIR observations. Using HWO and LIFE together will provide stronger constraints on biosignatures and life indicators, potentially transforming the search for life in the universe.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.13554-b31b1b.svg)](https://arxiv.org/abs/2406.13554) | **The emergence of the Star Formation Main Sequence with redshift unfolded by JWST**  |
|| P. Rinaldi, et al. -- incl., <mark>L. Boogaard</mark>, <mark>F. Walter</mark> |
|*Appeared on*| *2024-06-21*|
|*Comments*| *15 pages, 4 figures. Submitted to ApJ*|
|**Abstract**|            We investigate the correlation between stellar mass (M*) and star formation rate (SFR) across the stellar mass range log10(M*/Msun)~6-11. We consider almost 50,000 star-forming galaxies at z~3-7, leveraging data from COSMOS/SMUVS, JADES/GOODS-SOUTH, and MIDIS/XDF. This is the first study spanning such a wide stellar mass range without relying on gravitational lensing effects. We locate our galaxies on the SFR-M* plane to assess how the location of galaxies in the star-formation main sequence (MS) and starburst (SB) region evolves with stellar mass and redshift. We find that the two star-forming modes tend to converge at log10(M*/Msun) < 7, with all galaxies found in the SB mode. By dissecting our galaxy sample in stellar mass and redshift, we show that the emergence of the star-formation MS is stellar-mass dependent: while in galaxies with log10(M*/Msun) > 9 the MS is already well in place at z = 5-7, for galaxies with log10(M*/Msun)~7-8 it only becomes significant at z<4. Overall, our results are in line with previous findings that the SB mode dominates amongst low stellar-mass galaxies. The earlier emergence of the MS for massive galaxies is consistent with galaxy downsizing.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.12984-b31b1b.svg)](https://arxiv.org/abs/2406.12984) | **Constraining the Stellar Masses and Origin of the Protostellar VLA 1623 System**  |
|| S. I. Sadavoy, et al. -- incl., <mark>T. Henning</mark> |
|*Appeared on*| *2024-06-21*|
|*Comments*| *Accepted to A&A; 16 pages, 12 figures*|
|**Abstract**|            We present ALMA Band 7 molecular line observations of the protostars within the VLA 1623 system. We map C$^{17}$O (3 - 2) in the circumbinary disk around VLA 1623A and the outflow cavity walls of the collimated outflow. We further detect red-shifted and blue-shifted velocity gradients in the circumstellar disks around VLA 1623B and VLA 1623W that are consistent with Keplerian rotation. We use the radiative transfer modeling code, pdspy, and simple flared disk models to measure stellar masses of $0.27 \pm 0.03$ M$_\odot$, $1.9^{+0.3}_{-0.2}$ M$_\odot$, and $0.64 \pm 0.06$ M$_\odot$ for the VLA 1623A binary, VLA 1623B, and VLA 1623W, respectively. These results represent the strongest constraints on stellar mass for both VLA 1623B and VLA 1623W, and the first measurement of mass for all stellar components using the same tracer and methodology. We use these masses to discuss the relationship between the young stellar objects (YSOs) in the VLA 1623 system. We find that VLA 1623W is unlikely to be an ejected YSO, as has been previously proposed. While we cannot rule out that VLA 1623W is a unrelated YSO, we propose that it is a true companion star to the VLA 1623A/B system and that the these stars formed in situ through turbulent fragmentation and have had only some dynamical interactions since their inception.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2406.14293-b31b1b.svg)](https://arxiv.org/abs/2406.14293) | **Abundant hydrocarbons in the disk around a very-low-mass star**  |
|| A. M. Arabhavi, et al. -- incl., <mark>T. Henning</mark>, <mark>M. Samland</mark>, <mark>G. Perotti</mark>, <mark>J. Bouwman</mark>, <mark>S. Scheithauer</mark>, <mark>J. Schreiber</mark>, <mark>K. Schwarz</mark> |
|*Appeared on*| *2024-06-21*|
|*Comments*| *Published, 36 pages, 8 figures*|
|**Abstract**|            Very low-mass stars (those <0.3 solar masses) host orbiting terrestrial planets more frequently than other types of stars, but the compositions of those planets are largely unknown. We use mid-infrared spectroscopy with the James Webb Space Telescope to investigate the chemical composition of the planet-forming disk around ISO-ChaI 147, a 0.11 solar-mass star. The inner disk has a carbon-rich chemistry: we identify emission from 13 carbon-bearing molecules including ethane and benzene. We derive large column densities of hydrocarbons indicating that we probe deep into the disk. The high carbon to oxygen ratio we infer indicates radial transport of material within the disk, which we predict would affect the bulk composition of any planets forming in the disk.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_posteriors.png', 'tmp_2406.13037/./Figures/3_ScaledSNR/scaledsnr_rmse.png', 'tmp_2406.13037/./Figures/legend.png', 'tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_rmse.png', 'tmp_2406.13037/./Figures/legend.png']
copying  tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_posteriors.png to _build/html/
copying  tmp_2406.13037/./Figures/3_ScaledSNR/scaledsnr_rmse.png to _build/html/
copying  tmp_2406.13037/./Figures/legend.png to _build/html/
copying  tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_rmse.png to _build/html/
copying  tmp_2406.13037/./Figures/legend.png to _build/html/
exported in  _build/html/2406.13037.md
    + _build/html/tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_posteriors.png
    + _build/html/tmp_2406.13037/./Figures/3_ScaledSNR/scaledsnr_rmse.png
    + _build/html/tmp_2406.13037/./Figures/legend.png
    + _build/html/tmp_2406

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\R}[0]{\ensuremath{R}}$
$\newcommand{\Rv}[1]{\ensuremath{R = #1}}$
$\newcommand{\SN}[0]{\ensuremath{S/N}}$
$\newcommand{\SNv}[1]{\ensuremath{S/N = #1}}$
$\newcommand{\mic}[1]{\ensuremath{#1}~\textmu m}$
$\newcommand{\val}[3]{\ensuremath{#1^{+#2}_{-#3}}}$
$\newcommand{\pt}[0]{\textit{P}-\textit{T}}$
$\newcommand{\Rpl}[0]{\ensuremath{R_{\text{pl}}}}$
$\newcommand{\Mpl}[0]{\ensuremath{M_{\text{pl}}}}$
$\newcommand{\Teq}[0]{\ensuremath{T_\mathrm{eq}}}$
$\newcommand{\Ab}[0]{\ensuremath{A_\mathrm{B}}}$
$\newcommand{\Ps}[0]{\ensuremath{P_0}}$
$\newcommand{\Ts}[0]{\ensuremath{T_0}}$
$\newcommand{\life}[0]{LIFE}$
$\newcommand{\hwo}[0]{HWO}$
$\newcommand{\hwolife}[0]{HWO+LIFE}$
$\newcommand{\uvvisnir}[0]{UV/VIS/NIR}$
$\newcommand{\mir}[0]{MIR}$
$\newcommand{\lifesim}[0]{LIFE\textsc{sim}}$
$\newcommand{\prt}[0]{\texttt{petitRADTRANS}}$
$\newcommand{\pI}[0]{LIFE Paper~I}$
$\newcommand{\pII}[0]{LIFE Paper~II}$
$\newcommand{\pIII}[0]{LIFE Paper~III}$
$\newcommand{\pV}[0]{LIFE Paper~V}$
$\newcommand{\pIIIaV}[0]{LIFE Papers~III and~V}$
$\newcommand$</div>



<div id="title">

# Large Interferometer For Exoplanets (LIFE):

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.13037-b31b1b.svg)](https://arxiv.org/abs/2406.13037)<mark>Appeared on: 2024-06-21</mark> -  _16 pages (main text, incl. 12 figures) + appendix; accepted for publication in A&A (current version: post 1st revision). Thirteenth paper of LIFE telescope series_

</div>
<div id="authors">

E. Alei, et al. -- incl., <mark>P. Mollière</mark>

</div>
<div id="abstract">

**Abstract:** Following the recommendations to NASA (in the Astro2020 Decadal survey) and ESA (through the Voyage2050 process), the search for life on exoplanets will be a priority in the next decades. Two concepts for direct imaging space missions are being developed for this purpose: the Habitable Worlds Observatory (HWO), and the Large Interferometer for Exoplanets (LIFE). These two concepts operate in different spectral regimes: HWO is focused on reflected light spectra in the ultraviolet/visible/near-infrared ( $\uvvisnir$ ), while LIFE will operate in the mid-infrared ( $\mir$ ) to capture the thermal emission of temperate exoplanets. In this study we aim to assess the potential of HWO and LIFE in characterizing a cloud-free Earth twin orbiting a Sun-like star at 10 parsec distance both as separate missions and in synergy with each other. We aim to quantify the increase in information that can be gathered by joint atmospheric retrievals on a habitable planet. We perform Bayesian retrievals on simulated data obtained by a $\hwo$ -like and a $\life$ -like mission separately, then jointly. We consider the baseline spectral resolutions currently assumed for these concepts and use two increasingly complex noise simulations, obtained using state-of-the-art noise simulators. A $\hwo$ -like concept would allow to strongly constrain $\ce{H2O}$ , $\ce{O2}$ , and $\ce{O3}$ , in the atmosphere of a cloud-free Earth twin, while the atmospheric temperature profile is not well constrained (with an average uncertainty $\approx$ 100 K). $\life$ -like observations would strongly constrain $\ce{CO2}$ , $\ce{H2O}$ , $\ce{O3}$ and provide stronger constraints on the thermal atmospheric structure and surface temperature (down to $\approx$ 10 K uncertainty). For all the investigated scenarios, both missions would provide an upper limit on $\ce{CH4}$ . A joint retrieval on $\hwo$ and $\life$ data would accurately define the atmospheric thermal profile and planetary parameters. It would decisively constrain $\ce{CO2}$ , $\ce{H2O}$ , $\ce{O2}$ , and $\ce{O3}$ and find weak constraints on $\ce{CO}$ and $\ce{CH4}$ . The significance of the detection is in all cases greater or equal than the single-instrument retrievals. Both missions provide specific information that is relevant for the characterization of a terrestrial habitable exoplanet, but the scientific yield can be maximized by considering synergistic studies of $\uvvisnir$ + $\mir$ observations. The use of $\hwo$ and $\life$ together will provide stronger constraints on biosignatures and life indicators, with the potential of being transformative for the search for life in the universe.

</div>

<div id="div_fig1">

<img src="tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_posteriors.png" alt="Fig3" width="100%"/>

**Figure 3. -** Posterior density distributions from the second set of retrievals (simplified noise). The black lines indicate the expected values for every parameter. $\hwo$ posteriors are shown in magenta with diagonal hatching; $\life$ posteriors are shown in cyan with crossed hatching; $\hwo$life posteriors are shown as fully colored gold histograms. (*fig:constanterrorbarsposteriors*)

</div>
<div id="div_fig2">

<img src="tmp_2406.13037/./Figures/3_ScaledSNR/scaledsnr_rmse.png" alt="Fig8.1" width="50%"/><img src="tmp_2406.13037/./Figures/legend.png" alt="Fig8.2" width="50%"/>

**Figure 8. -** Square root of the mean squared error (see Equation \ref{eq:mse}) for relevant parameters in the third set of retrievals (PSG/$\life$sim noise).    (*fig:scaledsnrmse*)

</div>
<div id="div_fig3">

<img src="tmp_2406.13037/./Figures/2_ConstantErrorbars/constanterrorbars_rmse.png" alt="Fig5.1" width="50%"/><img src="tmp_2406.13037/./Figures/legend.png" alt="Fig5.2" width="50%"/>

**Figure 5. -** Square root of the mean squared error (see Equation \ref{eq:mse}) for relevant parameters in the second set of retrievals (simplified noise). (*fig:constanterrorbarsmse*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.13037"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\gsim}{{\;\raise0.3ex\hbox{>\kern-0.75em\raise-1.1ex\hbox{\sim}}\;}}$</div>



<div id="title">

# $\bf{The emergence of the Star Formation Main Sequence with redshift unfolded by JWST}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2406.13554-b31b1b.svg)](https://arxiv.org/abs/2406.13554)<mark>Appeared on: 2024-06-21</mark> -  _15 pages, 4 figures. Submitted to ApJ_

</div>
<div id="authors">

P. Rinaldi, et al. -- incl., <mark>L. Boogaard</mark>, <mark>F. Walter</mark>

</div>
<div id="abstract">

**Abstract:** We investigate the correlation between stellar mass (M $_{\star}$ ) and star formation rate (SFR) across the stellar mass range $\log_{10}(\rm M_{\star}/M_{\odot}) \approx 6-11$ . We consider almost 50,000 star-forming galaxies at $z\approx3-7$ , leveraging data from COSMOS/SMUVS, JADES/GOODS-SOUTH, and MIDIS/XDF. This is the first study spanning such a wide  stellar mass range without relying on gravitational lensing effects. We locate our galaxies on the $\mathrm{SFR - M_{\star}}$ plane to assess how the location of galaxies in the star-formation  main sequence (MS) and starburst (SB) region evolves with stellar mass and redshift. We find that the two star-forming modes tend to converge at $\log_{10}(\rm M_{\star}/M_{\odot}) < 7$ , with all galaxies found in the SB mode. By dissecting our galaxy sample in stellar mass and redshift, we show that the emergence of the star-formation MS is stellar-mass dependent: while in galaxies with $\log_{10}(\rm M_{\star}/M_{\odot}) > 9$ the MS is already well in place at $z=5-7$ , for galaxies with $\log_{10}(\rm M_{\star}/M_{\odot}) \approx 7-8$ it only becomes significant at $z<4$ .  Overall, our results are in line with previous findings that the SB mode dominates amongst low stellar-mass galaxies. The earlier emergence of the MS for massive galaxies is consistent with galaxy downsizing.

</div>

<div id="div_fig1">

<img src="tmp_2406.13554/./sSFR_MASS_evo.png" alt="Fig2" width="100%"/>

**Figure 2. -** The sSFR distribution of the entire sample (JADES/GOODS-SOUTH + COSMOS/SMUVS) divided in four distinct stellar mass bins. The entire plane is colour coded following the regions derived by [Caputi, Deshmukh and Ashby (2017)](): the star-formation MS for sSFR $>$ 10$^{-8.05}$ yr$^{-1}$, the Starburst cloud for sSFR $>$ 10$^{-7.60}$ yr$^{-1}$, and the Star Formation Valley for 10$^{-8.05}$ yr$^{-1}$$\leq$ sSFR $\leq$ 10$^{-7.60}$ yr$^{-1}$. The sSFR distribution are color coded by age, as derived by LePHARE. To consider the different areas covered by JADES/GOODS-SOUTH (67.7 arcmin$^{2}$) and COSMOS/SMUVS (0.66 deg$^{2}$), we normalized the JADES/GOODS-SOUTH counts to match the COSMOS/SMUVS survey area, which is approximately 35 times larger than that of JADES/GOODS-SOUTH. (*figure:ssfr_m_evo*)

</div>
<div id="div_fig2">

<img src="tmp_2406.13554/./SFR_M.png" alt="Fig1" width="100%"/>

**Figure 1. -** The $\mathrm{SFR}-\mathrm{M_{\star}}$ plane, showcasing all sources (JADES/GOODS-SOUTH + COSMOS/SMUVS) analyzed in this study, divided in redshift bins as indicated. The pale blue region marks the lower envelope for SB galaxies, based on the criteria from [Caputi, Deshmukh and Ashby (2017)](), [Caputi, Caminha and Fujimoto (2021)](). Fits for the MS and SB are derived from [Rinaldi and Caputi (2022)](). The gray shaded area represents the SFR threshold derived from the 2$\sigma$ detection of the JADES images used in this work. The vertical dashed line in each panel refers to the stellar mass completeness (75\%) of JADES sample at each redshift. The error bar showed in gray (upper right panel) indicate the median uncertainties on M$_{\star}$ and SFR. White contours are also presented to show the bimodality between MS and SB. (*figure:sfr_m_plane*)

</div>
<div id="div_fig3">

<img src="tmp_2406.13554/./sSFR_REDSHIFT_evo.png" alt="Fig3" width="100%"/>

**Figure 3. -** The sSFR distribution of the entire sample (JADES/GOODS-SOUTH + COSMOS/SMUVS) divided, this time, in four distinct stellar mass bins and four redshift bins. Each column refers to a specific redshift bin, while each row refers to a specific stellar mass bin. All 16 panels are color coded following [Caputi, Deshmukh and Ashby (2017)](): the star-formation MS for sSFR $>$ 10$^{-8.05}$ yr$^{-1}$, the Starburst cloud for sSFR $>$ 10$^{-7.60}$ yr$^{-1}$, and the Star Formation Valley for 10$^{-8.05}$ yr$^{-1}$$\leq$ sSFR $\leq$ 10$^{-7.60}$ yr$^{-1}$. Also in this case, as we did in Figure \ref{figure:ssfr_m_evo}, to consider the different areas covered by JADES/GOODS-SOUTH (67.7 arcmin$^{2}$) and COSMOS/SMUVS (0.66 deg$^{2}$), we normalized the JADES/GOODS-SOUTH counts to match the COSMOS/SMUVS survey area, which is approximately 35 times larger than that of JADES/GOODS-SOUTH. (*figure:ssfr_z_evo*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2406.13554"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

100  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

6  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
