# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

M. Samland  ->  M. Samland  |  ['M. Samland']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
E. Matthews  ->  E. Matthews  |  ['E. Matthews']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
J. Schreiber  ->  J. Schreiber  |  ['J. Schreiber']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
S. Hannon  ->  S. Hannon  |  ['S. Hannon']
E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
K. Kreckel  ->  K. Kreckel  |  ['K. Kreckel']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
U. Dudzeviciute  ->  U. Dudzeviciute  |  ['U. Dudzeviciute']
Arxiv has 51 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2403.04855


extracting tarball to tmp_2403.04855... done.


M. Samland  ->  M. Samland  |  ['M. Samland']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
E. Matthews  ->  E. Matthews  |  ['E. Matthews']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
J. Schreiber  ->  J. Schreiber  |  ['J. Schreiber']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']


Found 74 bibliographic references in tmp_2403.04855/aanda.bbl.
syntax error in line 508: '=' expected
Retrieving document from  https://arxiv.org/e-print/2403.04901


extracting tarball to tmp_2403.04901...

 done.
Retrieving document from  https://arxiv.org/e-print/2403.05328



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2403.05328...

 done.
Retrieving document from  https://arxiv.org/e-print/2403.05506


extracting tarball to tmp_2403.05506...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 140 bibliographic references in tmp_2403.05506/wide.bbl.
syntax error in line 207: '=' expected


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.04855-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.04855) | **MINDS: JWST/NIRCam imaging of the protoplanetary disk PDS 70**  |
|| V. Christiaens, et al. -- incl., <mark>M. Samland</mark>, <mark>G. Perotti</mark>, <mark>E. Matthews</mark>, <mark>S. Scheithauer</mark>, <mark>J. Schreiber</mark>, <mark>K. Schwarz</mark> |
|*Appeared on*| *2024-03-11*|
|*Comments*| *6+11 pages, 3+10 figures (text+appendix). Accepted for publication in A&A*|
|**Abstract**| Context. Two protoplanets have recently been discovered within the PDS 70 protoplanetary disk. JWST/NIRCam offers a unique opportunity to characterize them and their birth environment at wavelengths difficult to access from the ground. Aims. We aim to image the circumstellar environment of PDS 70 at 1.87 $\mu$m and 4.83 $\mu$m, assess the presence of Pa-$\alpha$ emission due to accretion onto the protoplanets, and probe any IR excess indicative of heated circumplanetary material. Methods. We obtain non-coronagraphic JWST/NIRCam images of PDS 70 within the MINDS (MIRI mid-INfrared Disk Survey) program. We leverage the Vortex Image Processing (VIP) package for data reduction, and develop dedicated routines for optimal stellar PSF subtraction, unbiased imaging of the disk, and protoplanet flux measurement in this type of dataset. A radiative transfer model of the disk is used to disentangle the contributions from the disk and the protoplanets. Results. We re-detect both protoplanets and identify extended emission after subtracting a disk model, including a large-scale spiral-like feature. We interpret its signal in the direct vicinity of planet c as tracing the accretion stream feeding its circumplanetary disk, while the outer part of the feature may rather reflect asymmetric illumination of the outer disk. We also report a bright signal consistent with a previously proposed protoplanet candidate enshrouded in dust, near the 1:2:4 mean-motion resonance with planets b and c. The 1.87 $\mu$m flux of planet b is consistent with atmospheric model predictions, but not that of planet c. We discuss potential origins for this discrepancy, including significant Pa-$\alpha$ line emission. The 4.83 $\mu$m fluxes of planets b and c suggest enshrouding dust or heated CO emission from their circumplanetary environment. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.05506-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.05506) | **The NIRSpec Wide GTO Survey**  |
|| M. V. Maseda, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>H.-W. Rix</mark>, <mark>U. Dudzeviciute</mark> |
|*Appeared on*| *2024-03-11*|
|*Comments*| *14 pages, 11 figures; Submitted to A&A*|
|**Abstract**| The Near-infrared Spectrograph (NIRSpec) on the James Webb Space Telescope is uniquely suited to studying galaxies in the distant Universe with its combination of multi-object capabilities and sensitivity over a large range in wavelength (0.6-5.3 microns). Here we present the NIRSpec Wide survey, part of the NIRSpec Instrument Science Team's Guaranteed Time Observations, using NIRSpec's microshutter array to obtain spectra of more than 3200 galaxies at $z>1$ at both low- and high-resolution ($R\approx100$ and 2700) for a total of 105 hours. With 31 pointings covering $\approx$320 arcmin$^2$ across the five CANDELS fields with exquisite ancillary photometry from the Hubble Space Telescope, the NIRSpec Wide survey represents a fast and efficient way of using JWST to probe galaxies in the early Universe. Pointing centers are determined to maximize the observability of the rarest, high-value sources. Subsequently, the microshutter configurations are optimized to observe the maximum number of "census" galaxies with a selection function based primarily on HST/F160W magnitude, photometric/slitless grism redshift, and predicted \ha\ flux tracing the bulk of the galaxy population at cosmic noon ($z_{\rm med}=2.0$). We present details on the survey strategy, the target selection, an outline of the motivating science cases, and discuss upcoming public data releases to the community. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.04901-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.04901) | **PHANGS-HST catalogs for $\sim$100,000 star clusters and compact  associations in 38 galaxies: I. Observed properties**  |
|| D. Maschmann, et al. -- incl., <mark>S. Hannon</mark>, <mark>E. Schinnerer</mark>, <mark>K. Kreckel</mark> |
|*Appeared on*| *2024-03-11*|
|*Comments*| *48 pages, 26 figures, article in review at APJ*|
|**Abstract**| We present the largest catalog to-date of star clusters and compact associations in nearby galaxies. We have performed a V-band-selected census of clusters across the 38 spiral galaxies of the PHANGS-HST Treasury Survey, and measured integrated, aperture-corrected NUV-U-B-V-I photometry. This work has resulted in uniform catalogs that contain $\sim$20,000 clusters and compact associations which have passed human inspection and morphological classification, and a larger sample of $\sim$100,000 classified by neural network models. Here, we report on the observed properties of these samples, and demonstrate that tremendous insight can be gained from just the observed properties of clusters, even in the absence of their transformation into physical quantities. In particular, we show the utility of the UBVI color-color diagram, and the three principal features revealed by the PHANGS-HST cluster sample: the young cluster locus, the middle-age plume, and the old globular cluster clump. We present an atlas of maps of the 2D spatial distribution of clusters and compact associations in the context of the molecular clouds from PHANGS-ALMA. We explore new ways of understanding this large dataset in a multi-scale context by bringing together once-separate techniques for the characterization of clusters (color-color diagrams and spatial distributions) and their parent galaxies (galaxy morphology and location relative to the galaxy main sequence). A companion paper presents the physical properties: ages, masses, and dust reddenings derived using improved spectral energy distribution (SED) fitting techniques. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2403.05328-b31b1b.svg)](https://arxiv.org/abs/arXiv:2403.05328) | **Swift monitoring of GK Persei during the 2018 dwarf nova outburst**  |
|| S. Pei, M. Orio, <mark>X. Zhang</mark> |
|*Appeared on*| *2024-03-11*|
|*Comments*| *9 pages, 4 figures*|
|**Abstract**| The old nova and intermediate polar (IP) GK Persei underwent one of its recurrent dwarf nova (DN) outbursts in 2018. We proposed monitoring it in UV and X-rays with the Neil Gehrels Swift Observatory, starting less than six days after the eruption, until 16 days after the eruption ended. For the first time we could follow the decay to minimum light UV and X-rays. We present the timing and spectral analysis, comparing the results with the previous outbursts and with the quiescent status. We confirm the spin modulation in X-rays with a period 351.325(9) s, only in the 2-10 keV range. The period was not detected in the 0.3-2 keV range and in the UV band, suggesting that the soft portion of the X-ray spectrum in GK Per does not originate near the poles, but in a wind or circumstellar material. The amplitude of the modulation was less prominent than in 2015, a fact that seems correlated with a lower average mass accretion rate. The spectral fits are consistent with a mass accretion rate increasing by a factor of 2 from rise to maximum and decreasing during the return to minimum, following the trend of the modulation amplitude. The maximum plasma temperature is higher than the Swift XRT energy range of 0.3-10 keV, thus it is not well constrained, but our spectral fits indicate that it may have varied irregularly during the outburst. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2403.04855.md
    + _build/html/tmp_2403.04855/./Fig1_v10.png
    + _build/html/tmp_2403.04855/./FigA1_v4.png
    + _build/html/tmp_2403.04855/./Fig2_v9.png
exported in  _build/html/2403.05506.md
    + _build/html/tmp_2403.05506/./survey_stats.png
    + _build/html/tmp_2403.05506/./sensitivity.png
    + _build/html/tmp_2403.05506/./zphot_zspec.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\ikc}[1]{\textcolor{blue}{\textsf{IK: #1 }}}$
$\newcommand{\vc}[1]{\textcolor{red}{\textbf{VC:} #1}}$
$\newcommand{\gp}[1]{\textcolor{teal}{\textbf{GP:} #1}}$
$\newcommand{\new}[1]{#1}$</div>



<div id="title">

# MINDS: JWST/NIRCam imaging of the protoplanetary disk PDS 70

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2403.04855-b31b1b.svg)](https://arxiv.org/abs/2403.04855)<mark>Appeared on: 2024-03-11</mark> -  _6+11 pages, 3+10 figures (text+appendix). Accepted for publication in A&A_

</div>
<div id="authors">

V. Christiaens, et al. -- incl., <mark>M. Samland</mark>, <mark>G. Perotti</mark>, <mark>E. Matthews</mark>, <mark>S. Scheithauer</mark>, <mark>J. Schreiber</mark>, <mark>K. Schwarz</mark>

</div>
<div id="abstract">

**Abstract:** Two protoplanets have recently been discovered within the PDS 70 protoplanetary $\new{disk}$ .    JWST/NIRCam offers a unique opportunity to characterize them and their birth environment at wavelengths difficult to access from the ground. We aim to image the circumstellar environment of PDS 70    at 1.87 $\upmu$ m and 4.83 $\upmu$ m, assess the presence of Pa- $\alpha$ emission due to accretion onto the protoplanets, and probe any IR excess indicative of heated circumplanetary material. We obtain non-coronagraphic JWST/NIRCam images of PDS 70 within the MINDS (MIRI mid-INfrared Disk Survey) program. We leverage the Vortex Image Processing (VIP) package for data reduction, and develop dedicated routines for optimal stellar PSF subtraction, unbiased imaging of the disk, and protoplanet flux measurement in this type of dataset.    A radiative transfer model of the disk is used to disentangle the    contributions from the disk and the protoplanets. We re-detect both protoplanets    and identify extended emission after subtracting a    disk model, including a large-scale spiral-like feature. We interpret its signal in the direct vicinity of planet $c$ as tracing the accretion stream feeding its circumplanetary disk, while the outer part of the feature may rather reflect asymmetric illumination    of the outer disk.    We also report a bright signal consistent with a previously proposed protoplanet candidate enshrouded in dust, near the 1:2:4 mean-motion resonance with planets $b$ and $c$ .    The 1.87 $\upmu$ m flux of planet $b$ is consistent with atmospheric model predictions,    but not that of planet $c$ . We discuss potential origins for this discrepancy, including    significant Pa- $\alpha$ line emission.    The 4.83 $\upmu$ m fluxes of planets $b$ and $c$ suggest    enshrouding dust or heated CO emission from their circumplanetary environment. The use of image-processing methods optimized for extended disk signals on high-sensitivity and high-stability JWST images can uniquely identify signatures of planet--disk interactions and enable accurate photometry of protoplanets at wavelengths difficultly probed from the ground. Our results point towards the prospect of identifying and characterizing more protoplanets in other JWST datasets.

</div>

<div id="div_fig1">

<img src="tmp_2403.04855/./Fig1_v10.png" alt="Fig3" width="100%"/>

**Figure 3. -** 
    NIRCam images of PDS 70 obtained in the F187N (top row) and F480M (bottom row) filters
    using our iterative PCA algorithm.
    The second and third columns show the images obtained after subtraction of our outer disk model,
    and further subtraction of protoplanets $b$ and c, respectively. Major and minor axes of the disk are indicated with solid and dashed lines, respectively, in the first column. Dashed circles indicate the predicted location for protoplanets $b$ and $c$ based on the orbital fits in \citet{Wang2021}, and for candidate $d$ based on the orbit suggested in \citetalias{Mesa2019a}. The astrometric measurements for $d$(blue dots) are compared to our new estimated astrometry (solid circle) in panel c.
    Units are MJy sr$^{-1}$.
     (*fig:FinalImages*)

</div>
<div id="div_fig2">

<img src="tmp_2403.04855/./FigA1_v4.png" alt="Fig6" width="100%"/>

**Figure 6. -** Images obtained at 1.87$\upmu$m (F187N; top row) and 4.80$\upmu$m (F480M; bottom row) with mean roll subtraction,
    TRAP, IROLL and IPCA. See text for details on each algorithm. The images correspond to the largest common field of view probed by the dithering pattern employed during the observation. The plate scale is 31 and 63 mas/pixel for the F187N and F480M images, respectively. A numerical mask with radius set to the FWHM of the PSF covers the inner part of the F187N images.
     (*fig:AltAlgos*)

</div>
<div id="div_fig3">

<img src="tmp_2403.04855/./Fig2_v9.png" alt="Fig4" width="100%"/>

**Figure 4. -** Composite SED of PDS 70 $b$ showing literature spectro- and photometric measurements (grey and black error bars, respectively), the new NIRCam F187N and F480M photometry
(blue and red error bars, respectively), and
the best-fit atmospheric models found in \cite{Wang2021}. The model with the most support is shown with an orange solid line (extinct BT-SETTL model with extra blackbody emission). It is consistent with both of our measurements, and suggests the need for a circumplanetary contribution.
The light blue error bar is obtained
considering literature photometry for the star,
and illustrates the uncertainty associated to variability affecting $\new${some} other data points of the SED (details in Appendix \ref{sec:F187N_excess}).
 (*fig:spec_b*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2403.04855"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\kms}{\rm km s^{-1}}$
$\newcommand{\ha}{H\alpha}$
$\newcommand{\hb}{H\beta}$
$\newcommand{\oiii}{[O III]}$
$\newcommand{\nii}{[N II]}$
$\newcommand{\oii}{[O II]}$
$\newcommand{\micron}{\rm \mu m}$</div>



<div id="title">

# The NIRSpec Wide GTO Survey

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2403.05506-b31b1b.svg)](https://arxiv.org/abs/2403.05506)<mark>Appeared on: 2024-03-11</mark> -  _14 pages, 11 figures; Submitted to A&A_

</div>
<div id="authors">

M. V. Maseda, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>H.-W. Rix</mark>, <mark>U. Dudzeviciute</mark>

</div>
<div id="abstract">

**Abstract:** The Near-infrared Spectrograph (NIRSpec) on the James Webb Space Telescope is uniquely suited to studying galaxies in the distant Universe with its combination of multi-object capabilities and sensitivity over a large range in wavelength ( $0.6-5.3 \micron$ ). Here we present the NIRSpec Wide survey, part of the NIRSpec Instrument Science Team's Guaranteed Time Observations, using NIRSpec's microshutter array to obtain spectra of more than 3200 galaxies at $z>1$ at both low- and high-resolution ( $R\approx100$ and 2700) for a total of 105 hours.  With 31 pointings covering $\approx$ 320 arcmin $^2$ across the five CANDELS fields with exquisite ancillary photometry from the Hubble Space Telescope, the NIRSpec Wide survey represents a fast and efficient way of probing galaxies in the early Universe.  Pointing centers are determined to maximize the observability of the rarest, high-value sources. Subsequently, the microshutter configurations are optimized to observe the maximum number of "census" galaxies with a selection function based primarily on HST/F160W magnitude, photometric/slitless grism redshift, and predicted $\ha$ flux tracing the bulk of the galaxy population at cosmic noon ( $z_{\rm med}=2.0$ ).  We present details on the survey strategy, the target selection, an outline of the motivating science cases, and discuss upcoming public data releases to the community.

</div>

<div id="div_fig1">

<img src="tmp_2403.05506/./survey_stats.png" alt="Fig1" width="100%"/>

**Figure 1. -** Area covered by the NIRSpec MSA versus total number of targets observed in the survey for various Cycle 1 programs (limited to PRISM observations).  While amongst the shallowest in terms of (PRISM) exposure time, the NIRSpec "Wide" GTO survey covers the largest area and targets the most individual galaxies.  In addition, the Wide survey is spread over five non-contiguous fields in order to maximize the chances of observing cosmologically-rare targets. (*fig:survey*)

</div>
<div id="div_fig2">

<img src="tmp_2403.05506/./sensitivity.png" alt="Fig2" width="100%"/>

**Figure 2. -** Empirically-derived emission line (top) and continuum (bottom) sensitivities for the Wide survey as a function of disperser and observed wavelength.  The solid lines are the 5-$\sigma$ sensitivities for a centered point source, while the dashed lines are the 5-$\sigma$ sensitivities for an "extended" source, namely a centered object with a Sersic index $n=1$ and a half-light radius of 0$\farcs$3 \cite[a typical star-forming galaxy at $z\approx$1.5;][]{Wel:2014}. (*fig:sens*)

</div>
<div id="div_fig3">

<img src="tmp_2403.05506/./zphot_zspec.png" alt="Fig7" width="100%"/>

**Figure 7. -** (Left) Photometric redshifts (from \texttt{EAzY}) versus Wide spectroscopic redshifts for targets in the AEGIS field.  While many of the objects had accurate photo-$z$ estimates (70\% have $\Delta$z/(1+$z$) values below 0.25), there is a significant benefit to obtaining precise redshifts from spectroscopy for many of our primary science cases.  (Center and Right) Example PRISM spectra of objects with a discrepancy between $z_{\mathrm{phot}}$ and $z_{\mathrm{spec}}$.
 The black line shows the best-fitting \texttt{EAzY} template at each of the redshifts, where the Wide-based spectroscopic redshifts are unequivocally correct. (*fig:zphotzspec*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2403.05506"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

384  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

7  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
