# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Li  ->  J. Li  |  ['J. Li']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
M. Samland  ->  M. Samland  |  ['M. Samland']
J. Bouwman  ->  J. Bouwman  |  ['J. Bouwman']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
J. Schreiber  ->  J. Schreiber  |  ['J. Schreiber']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
J. Liu  ->  J. Liu  |  ['J. Liu']
Arxiv has 52 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2403.08850


extracting tarball to tmp_2403.08850... done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


J. Li  ->  J. Li  |  ['J. Li']


Found 79 bibliographic references in tmp_2403.08850/main.bbl.
syntax error in line 298: '=' expected
Retrieving document from  https://arxiv.org/e-print/2403.09088


extracting tarball to tmp_2403.09088...

 done.
Retrieving document from  https://arxiv.org/e-print/2403.09210


'utf-8' codec can't decode byte 0xa1 in position 1390: invalid start byte


extracting tarball to tmp_2403.09210...

 done.


Found 148 bibliographic references in tmp_2403.09210/PASP_HenningKamp.bbl.
syntax error in line 81: '=' expected
Retrieving document from  https://arxiv.org/e-print/2403.09464


extracting tarball to tmp_2403.09464...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.08850-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.08850) | **The Present-Day Mass Function of Star Clusters in the Solar Neighborhood**  |
|| X. Pang, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2024-03-15*|
|*Comments*| *19 pages, 10 figures, accepted to ApJ*|
|**Abstract**| This work analyses the present-day mass function (PDMF) of 93~star clusters utilizing Gaia DR3 data, with membership determined by the StarGo machine learning algorithm. The impact of unresolved binary systems on mass estimation is rigorously assessed, adopting three mass ratio profiles for correction. The PDMF is characterized by the power-law index, $\alpha$, derived through a robust maximum likelihood method that avoids biases associated with data binning. The value of $\alpha$ for stars between the completeness limited mass of Gaia with a mean 0.3 $M_\odot$ for our cluster samples and 2 $M_\odot$, exhibits stability for clusters younger than 200 Myr, decreasing for older clusters, particularly when considering stars within the half-mass radius. The PDMF of these star clusters is consistent with a dynamically evolved Kroupa IMF via the loss of low-mass stars. Cluster morphology shows a correlation with $\alpha$, as $\alpha$ values exhibit a decreasing trend from filamentary to tidal-tail clusters, mirroring the sequence of increasing cluster age. The dependence of $\alpha$ on total cluster mass is weak, with a subtle increase for higher-mass clusters, especially outside the half-mass radius. We do not observe a correlation between $\alpha$ and the mean metallicity of the clusters. Younger clusters have lower metallicity compared to their older counterparts, which indicates that the older clusters might have migrated to the solar neighbourhood from the inner disk. A comparison with numerical models incorporating a black hole population suggests the need for observations of distant, older, massive open clusters to determine whether or not they contain black holes. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.09210-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.09210) | **MINDS: The JWST MIRI Mid-INfrared Disk Survey**  |
|| T. Henning, et al. -- incl., <mark>M. Samland</mark>, <mark>J. Bouwman</mark>, <mark>G. Perotti</mark>, <mark>J. Schreiber</mark>, <mark>K. Schwarz</mark> |
|*Appeared on*| *2024-03-15*|
|*Comments*| *accepted for publication in PASP*|
|**Abstract**| The study of protoplanetary disks has become increasingly important with the Kepler satellite finding that exoplanets are ubiquitous around stars in our galaxy and the discovery of enormous diversity in planetary system architectures and planet properties. High-resolution near-IR and ALMA images show strong evidence for ongoing planet formation in young disks. The JWST MIRI mid-INfrared Disk Survey (MINDS) aims to (1) investigate the chemical inventory in the terrestrial planet-forming zone across stellar spectral type, (2) follow the gas evolution into the disk dispersal stage, and (3) study the structure of protoplanetary and debris disks in the thermal mid-IR. The MINDS survey will thus build a bridge between the chemical inventory of disks and the properties of exoplanets. The survey comprises 52 targets (Herbig Ae stars, T Tauri stars, very low-mass stars and young debris disks). We primarily obtain MIRI/MRS spectra with high S/N (~100-500) covering the complete wavelength range from 4.9 to 27.9 {\mu}m. For a handful of selected targets we also obtain NIRSpec IFU high resolution spectroscopy (2.87-5.27 {\mu}m). We will search for signposts of planet formation in thermal emission of micron-sized dust - information complementary to near-IR scattered light emission from small dust grains and emission from large dust in the submillimeter wavelength domain. We will also study the spatial structure of disks in three key systems that have shown signposts for planet formation, TW Hya and HD 169142 using the MIRI coronagraph at 15.5 {\mu}m and 10.65 {\mu}m respectively and PDS70 using NIRCam imaging in the 1.87 {\mu}m narrow and the 4.8 {\mu}m medium band filter. ... |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.09464-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.09464) | **New constraints on Triton's atmosphere from the 6 October 2022 stellar  occultation**  |
|| Y. Yuan, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2024-03-15*|
|*Comments*| *Astronomy & Astrophysics, in press. 9 pages, 2 figures, 3 tables*|
|**Abstract**| The atmosphere of Triton was probed directly by observing a ground-based stellar occultation on 6 October 2022. This rare event yielded 23 positive light curves collected from 13 separate observation stations contributing to our campaign. The significance of this event lies in its potential to directly validate the modest pressure fluctuation on Triton, a phenomenon not definitively verified by previous observations, including only five stellar occultations, and the Voyager 2 radio occultation in 1989. Using an approach consistent with a comparable study, we precisely determined a surface pressure of $14.07_{-0.13}^{+0.21}~\mathrm{\mu bar}$ in 2022. This new pressure rules out any significant monotonic variation in pressure between 2017 and 2022 through direct observations, as it is in alignment with the 2017 value. Additionally, both the pressures in 2017 and 2022 align with the 1989 value. This provides further support for the conclusion drawn from the previous volatile transport model simulation, which is consistent with the observed alignment between the pressures in 1989 and 2017; that is to say, the pressure fluctuation is modest. Moreover, this conclusion suggests the existence of a northern polar cap extended down to at least $45^\circ$N$-60^\circ$N and the presence of nitrogen between $30^\circ$S and $0^\circ$. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.09088-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.09088) | **The fundamental plane of blazars based on the black hole spin-mass  energy**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2024-03-15*|
|*Comments*| *Accepted for publication in MNRAS*|
|**Abstract**| We examine the fundamental plane of 91 Blazars which include FSRQs and BL Lacs with known X-ray luminosity ($L_{R}$), radio luminosity ($L_X$), and black hole mass measurements ($M$) to reflect the relationship between jet and accretion for blazars. The fundamental plane of Blazars are log$L_{R}$=${0.273}_{+0.059}^{-0.059}$log$L_X$+${0.695}_{+0.191}^{-0.191}$log$M$+${25.457}_{+2.728}^{-2.728}$ and log$L_{R}$=${0.190}_{+0.049}^{-0.049}$log$L_X$+${0.475}_{+0.157}^{-0.157}$log$M$+${28.568}_{+2.245}^{-2.245}$ after considering the effect of beam factor. Our results suggest that the jet of blazars has connection with accretion. We set the black hole spin energy as a new variable to correct the black hole mass and explore the effect of black hole spin on the fundamental relationship. We find that the fundamental plane of Blazars is effected by the black hole spin, which is similar to the previous work for AGNs. We additionally examine a new fundamental plane which is based on the black hole spin-mass energy ($M_{spin}$). The new fundamental plane (log$L_{R}$=${0.332}_{+0.081}^{-0.081}$log$L_X$+${0.502}_{+0.091}^{-0.091}$log$M_{spin}$+${22.606}_{+3.346}^{-3.346}$ with R-Square=0.575) shows that $M_{spin}$ has a better correlation coefficient comparing to the $M$ for fundamental plane of Blazars. These results support that the black hole spin should be considered as a important factor for the study of fundamental plane for Blazars. And these may further our understanding of the Blandford-Znajek process in blazars. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error 'utf-8' codec can't decode byte 0xa1 in position 1390: invalid start byte</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2403.08850.md
    + _build/html/tmp_2403.08850/./NGC_6991_info.png
    + _build/html/tmp_2403.08850/./Fig6_alpha-Age_Mass_Mar07.png
    + _build/html/tmp_2403.08850/./Fig3_Hist.png
exported in  _build/html/2403.09210.md
    + _build/html/tmp_2403.09210/./TWHya-zoom-molecular-spectra-v3.png
    + _build/html/tmp_2403.09210/./TWHya-abundances_blue2_v2.png
    + _build/html/tmp_2403.09210/./TWHya_5-25mic_spectrum_with_models_v2.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\pmra}{\mu_\alpha \cos\delta}$
$\newcommand{\pmdec}{\mu_\delta}$
$\newcommand{\rh}{r_{\rm h}}$
$\newcommand{\Teff}{T_\mathrm{eff}}$
$\newcommand{\zt}[1]{\textcolor{blue}{  #1}}$
$\newcommand{\xy}[1]{\textcolor{blue}{  #1}}$
$\newcommand{\yf}[1]{\textcolor{orange}{  #1}}$
$\newcommand{\jd}[1]{\textcolor{teal}{  #1}}$
$\newcommand{\mjnote}[1]{\textit{\textcolor{magenta}{[mingjie:#1]}}}$
$\newcommand{\mj}[1]{\textcolor{magenta}{#1}}$
$\newcommand{\tk}[1]{\textcolor{red}{  #1}}$</div>



<div id="title">

# The Present-Day Mass Function of Star Clusters in the Solar Neighborhood

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2403.08850-b31b1b.svg)](https://arxiv.org/abs/2403.08850)<mark>Appeared on: 2024-03-15</mark> -  _19 pages, 10 figures, accepted to ApJ_

</div>
<div id="authors">

X. Pang, et al. -- incl., <mark>J. Li</mark>

</div>
<div id="abstract">

**Abstract:** This work analyses the present-day mass function (PDMF) of 93 star clusters utilizing Gaia DR3 data, with membership determined by the \texttt{StarGo} machine learning algorithm.The impact of unresolved binary systems on mass estimation is rigorously assessed, adopting three mass ratio profiles for correction. The PDMF is characterized by the power-law index, $\alpha$ , derived through a robust maximum likelihood method that avoids biases associated with data binning.The value of $\alpha$ for stars between the completeness limited mass of Gaia (with a mean 0.3 $M_\odot$ for our cluster samples) and 2 $M_\odot$ , exhibits stability for clusters younger than 200 Myr, decreasing for older clusters, particularly when considering stars within the half-mass radius.The PDMF of these star clusters is consistent with a dynamically evolved Kroupa IMF via the loss of low-mass stars. Cluster morphology shows a correlation with $\alpha$ , as $\alpha$ values exhibit a decreasing trend from filamentary to tidal-tail clusters, mirroring the sequence of increasing cluster age. The dependence of $\alpha$ on total cluster mass is weak, with a subtle increase for ${ higher-mass}$ clusters, especially outside the half-mass radius.  We do not observe a correlation between $\alpha$ and the mean metallicity of the clusters. Younger clusters have lower metallicity compared to their older counterparts, which indicates that the older ${  clusters}$ might ${  have migrated}$ to the solar neighbourhood from the inner disk. A comparison with numerical models incorporating a black hole population suggests the need for observations of distant, older, massive open clusters to ${  determine whether or not they contain black holes}$ .

</div>

<div id="div_fig1">

<img src="tmp_2403.08850/./NGC_6991_info.png" alt="Fig6" width="100%"/>

**Figure 6. -** (a): A 100$\times$100 2D neural network generated by StarGO for NGC 6991. Each grid is one neuron. Neurons corresponding to member stars (5\% contamination rate) formed a blue patch in this figure. (b): {  CMD} obtained from the Gaia DR 3 absolute magnitude ${\rm M}_G$ for member stars in NGC 6991. The PARSEC isochrones of the fitted age are indicated with the black solid curve, the fitted extinction and metallicity are also indicated in the upper right corner. (c): Spatial distribution of member stars (blue dots) selected by StarGO. (d) The proper motion vector plot for member stars.  (*fig:ngc6991*)

</div>
<div id="div_fig2">

<img src="tmp_2403.08850/./Fig6_alpha-Age_Mass_Mar07.png" alt="Fig9" width="100%"/>

**Figure 9. -**  Dependence of the power-law index ($\alpha$) of the most probable PDMF on the cluster age (left panels) and total cluster mass above completeness limit, $M_{\rm cl}$(right panels). The uniform mass ratio distribution is used for binary correction. The grey dashed-dotted line in each panel corresponds to $\alpha=2.04$ computed from the \citet{kroupa2001} IMF in the mass range from 0.3 $M_\odot$ to 2 $M_\odot$. The values of $\alpha$ in panels (a) and (b) are computed for all members in the cluster.
    The orange triangles are average values of cluster age and $\alpha$ for all 15--16 clusters in each bin, with the standard deviation indicated by the error bar. In panels (c) and (d), we separate each cluster into two parts and derive the PDMF individually: within the half-mass radius ($r<r_h$: blue dots) and outside half-mass radius ($r>r_h$: orange dots). The orange and blue triangles are computed in the same manner as in panels (a) and (b). We exclude the disrupted cluster Group X from panels (c) and (d) since it is hard to define the cluster center for its two-piece-fragmented shape.
    The quantity $s$ is Spearman’s rank correlation coefficient, and $p$ is the probability of the null hypothesis (i.e., that no correlation exists between two variables) of the correlation test. A $p$ value of less than 0.1 indicates that the null hypothesis is rejected.
     (*fig:alpha_age_mass*)

</div>
<div id="div_fig3">

<img src="tmp_2403.08850/./Fig3_Hist.png" alt="Fig7" width="100%"/>

**Figure 7. -** The most probable PDMFs of example clusters Pleiades (a, b, c) and Praesepe (d, e, f) after binary correction, considering three different mass ratio distributions. The red vertical dashed line indicates the completeness limited mass of Gaia DR 3 data $m_{\rm lower}$, which is 0.28 $M_\odot$ and 0.31 $M_\odot$ for Pleiades and Praesepe respectively. The red vertical dotted line corresponds to the upper mass limit  $m_{\rm upper}$ of 2 $M_\odot$. The blue curves are the mass distributions of Pleiades and Praesepe. The PDMFs (orange dashed lines) are determined only for the stellar mass between  $m_{\rm lower}$ and  $m_{\rm upper}$. The power-law index $\alpha$ value obtained from the maximum likelihood method in Section \ref{sec:MLE} is indicated in each panel.
     (*fig:mass_function*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2403.08850"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\ikc}[1]{\textcolor{blue}{\textsf{IK: #1 }}}$
$\newcommand{\ik}[1]{\textcolor{green}{\textsf{#1}}}$
$\newcommand{\gp}[1]{\textcolor{cyan}{\textsf{GP: #1 }}}$</div>



<div id="title">

# MINDS: The JWST MIRI Mid-INfrared Disk Survey

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2403.09210-b31b1b.svg)](https://arxiv.org/abs/2403.09210)<mark>Appeared on: 2024-03-15</mark> -  _accepted for publication in PASP_

</div>
<div id="authors">

T. Henning, et al. -- incl., <mark>M. Samland</mark>, <mark>J. Bouwman</mark>, <mark>G. Perotti</mark>, <mark>J. Schreiber</mark>, <mark>K. Schwarz</mark>

</div>
<div id="abstract">

**Abstract:** The study of protoplanetary disks has become increasingly important with the Kepler satellite finding that exoplanets are ubiquitous around stars in our galaxy and the discovery of enormous diversity in planetary system architectures and planet properties. High-resolution near-IR and ALMA images show strong evidence for ongoing planet formation in young disks. The JWST MIRI mid-INfrared Disk Survey (MINDS) aims to (1) investigate the chemical inventory in the terrestrial planet-forming zone across stellar spectral type, (2) follow the gas evolution into the disk dispersal stage, and (3) study the structure of protoplanetary and debris disks in the thermal mid-IR. The MINDS survey will thus build a bridge between the chemical inventory of disks and the properties of exoplanets. The survey comprises 52 targets (Herbig Ae stars, T Tauri stars, very low-mass stars and young debris disks). We primarily obtain MIRI/MRS spectra with high S/N ( $\sim\!100-500$ ) covering the complete wavelength range from 4.9 to 27.9 $\mu$ m. For a handful of selected targets we also obtain NIRSpec IFU high resolution spectroscopy (2.87-5.27 $\mu$ m). We will search for signposts of planet formation in thermal emission of micron-sized dust – information complementary to near-IR scattered light emission from small dust grains and emission from large dust in the submillimeter wavelength domain. We will also study the spatial structure of disks in three key systems that have shown signposts for planet formation, TW Hya and HD 169142 using the MIRI coronagraph at 15.5 $\mu$ m and 10.65 $\mu$ m respectively and PDS 70 using NIRCam imaging in the $1.87 \mu$ m narrow and the $4.8 \mu$ m medium band filter. We provide here an overview of the MINDS survey and showcase the power of the new JWST mid-IR molecular spectroscopy with the TW Hya disk spectrum where we report the detection of the molecular ion $\ce{CH3+}$ and the robust confirmation of $\ce{HCO+}$ earlier detected with Spitzer.

</div>

<div id="div_fig1">

<img src="tmp_2403.09210/./TWHya-zoom-molecular-spectra-v3.png" alt="Fig6" width="100%"/>

**Figure 6. -** Continuum subtracted TW Hya MIRI spectrum (black) with (a) \ce{HCO+} slab model (green) overplotted ($T=250$ K, also visible the \ce{H2} S(2) line at 12.28 $\mu$m and the H {\sc i} 11-8 and 7-6 lines at 12.39 and 12.37 $\mu$m), (b) \ce{CO2} slab model (blue) overplotted ($T=200$ K), (c) \ce{CH3+} slab model (red) overplotted ($T=500$ K, also visible the \ce{H2} S(5) line at 6.91 $\mu$m and the \ce{Ar+} fine-structure line at 6.99 $\mu$m), (d) CO slab model (magenta) overplotted ($T=500$ K), (e) \ce{H2O}, OH and \ce{CO2} slab models (colors indicated in legend, $T=400, 1000, 200$ K, respectively) in the $14.0-15.4 \mu$m wavelength range. All slab models are calculated in LTE and used only for identification of molecular emission features. (*fig:TWHya-zoom-molecular-spectra*)

</div>
<div id="div_fig2">

<img src="tmp_2403.09210/./TWHya-abundances_blue2_v2.png" alt="Fig9" width="100%"/>

**Figure 9. -** Selected abundance distribution of specific molecules and molecular ions in the adjusted TW Hya DIANA thermo-chemical disk model (elemental abundances of C, N, O depleted by a factor 50 in the inner 2.4 au). Overplotted are the PDR parameter $\log \chi/n_{\langle \rm H \rangle}$(see footnote in Sect. \ref{Sec:Thermo-chemical-Model}) and the dust temperature of 20 K for CO, and for all other molecules the gas temperature contours of 100, 300 and 1000 K (white) and the $A_{\rm V}\!=\!1$ mag line (black). (*fig:TWHyamodel-abundances*)

</div>
<div id="div_fig3">

<img src="tmp_2403.09210/./TWHya_5-25mic_spectrum_with_models_v2.png" alt="Fig10" width="100%"/>

**Figure 10. -** JWST/MIRI MRS spectrum (black) compared to the adjusted DIANA TW Hya model using a gas-to-dust mass ratio in the inner disk of 15 (blue, shifted by 0.2 Jy), and 0.15 (green, shifted by 1 Jy). The orange spectrum (shifted by 0.6 Jy) shows the model with a gas-to-dust mass ratio of 15 and the C, N and O abundances in the inner 2.4 au lowered by a factor 50. (*fig:TWHya-obs-model*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2403.09210"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

390  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

11  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
