# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

A. Kospal  ->  A. Kospal  |  ['A. Kospal']
S. Li  ->  S. Li  |  ['S. Li']
Arxiv has 54 new papers today
          2 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/2 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2404.02222


extracting tarball to tmp_2404.02222...

 done.


Found 147 bibliographic references in tmp_2404.02222/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2404.02275


extracting tarball to tmp_2404.02275... done.


S. Li  ->  S. Li  |  ['S. Li']


Found 107 bibliographic references in tmp_2404.02275/ms.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2404.02222-b31b1b.svg)](https://arxiv.org/abs/arXiv:2404.02222) | **Polarimetric differential imaging with VLT/NACO. A comprehensive PDI  pipeline for NACO data (PIPPIN)**  |
|| S. d. Regt, et al. -- incl., <mark>A. Kospal</mark> |
|*Appeared on*| *2024-04-04*|
|*Comments*| *Accepted for publication in A&A. For more information on PIPPIN, see: this https URL . The reduced images are available on Zenodo: this https URL*|
|**Abstract**| The observed diversity of exoplanets can possibly be traced back to the planet formation processes. Planet-disk interactions induce sub-structures in the circumstellar disk that can be revealed via scattered light observations. However, a high-contrast imaging technique such as polarimetric differential imaging (PDI) must first be applied to suppress the stellar diffraction halo. In this work we present the PDI PiPelIne for NACO data (PIPPIN), which reduces the archival polarimetric observations made with the NACO instrument at the Very Large Telescope. Prior to this work, such a comprehensive pipeline to reduce polarimetric NACO data did not exist. We identify a total of 243 datasets of 57 potentially young stellar objects observed before NACO's decommissioning. The PIPPIN pipeline applies various levels of instrumental polarisation correction and is capable of reducing multiple observing setups, including half-wave plate or de-rotator usage and wire-grid observations. A novel template-matching method is applied to assess the detection significance of polarised signals in the reduced data. In 22 of the 57 observed targets, we detect polarised light resulting from a scattering of circumstellar dust. The detections exhibit a collection of known sub-structures, including rings, gaps, spirals, shadows, and in- or outflows of material. Since NACO was equipped with a near-infrared wavefront sensor, it made unique polarimetric observations of a number of embedded protostars. This is the first time detections of the Class I objects Elia 2-21 and YLW 16A have been published. Alongside the outlined PIPPIN pipeline, we publish an archive of the reduced data products, thereby improving the accessibility of these data for future studies. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2404.02275-b31b1b.svg)](https://arxiv.org/abs/arXiv:2404.02275) | **The ALMA-QUARKS Survey: II. the ACA 1.3 mm continuum source catalog and  the assembly of dense gas in massive star-forming clumps**  |
|| F. Xu, et al. -- incl., <mark>S. Li</mark> |
|*Appeared on*| *2024-04-04*|
|*Comments*| *24 pages, 7 figures. Accepted for publication in Research in Astronomy and Astrophysics. QUARKS atlas link: this https URL*|
|**Abstract**| Leveraging the high resolution, high sensitivity, and wide frequency coverage of the Atacama Large Millimeter/submillimeter Array (ALMA), the QUARKS survey, standing for "Querying Underlying mechanisms of massive star formation with ALMA-Resolved gas Kinematics and Structures", is observing 139 massive star-forming clumps at ALMA Band 6 ($\lambda\sim$ 1.3 mm). This paper introduces the Atacama Compact Array (ACA) 7-m data. Combining multi-wavelength data, we provide the first edition of QUARKS atlas, offering insights into the multiscale and multiphase interstellar medium in high-mass star formation. The ACA 1.3 mm catalog includes 207 continuum sources that are called ACA sources. Their gas kinetic temperatures are estimated using three formaldehyde (H$_2$CO) transitions with a non-LTE radiation transfer model, and the mass and density are derived from a dust emission model. The ACA sources are massive (16-84 percentile values of 6-160 $M_{\odot}$), gravity-dominated ($M\propto R^{1.1}$) fragments within massive clumps, with supersonic turbulence ($\mathcal{M}>1$) and embedded star-forming protoclusters. We find a linear correlation between the masses of the fragments and the massive clumps, with a ratio of 6% between the two. When considering the fragments as representative of dense gas, the ratio indicates a dense gas fraction (DGF) of 6%, although with a wide scatter ranging from 1% to 10%. If we consider the QUARKS massive clumps to be what is observed at various scales, then the size-independent DGF indicates a self-similar fragmentation or collapsing mode in protocluster formation. With the ACA data over four orders of magnitude of luminosity-to-mass ratio ($L/M$), we find that the DGF increases significantly with $L/M$, which indicates clump evolutionary stage. We observed a limited fragmentation at the subclump scale, which can be explained by dynamic global collapse process. |

## Failed papers

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2404.02222.md
    + _build/html/tmp_2404.02222/./plots/figure_gallery.png
    + _build/html/tmp_2404.02222/./plots/figure_Q_U.png
    + _build/html/tmp_2404.02222/./plots/figure_PI_vs_m_J.png
exported in  _build/html/2404.02275.md
    + _build/html/tmp_2404.02275/./stats.png
    + _build/html/tmp_2404.02275/./DGF_Rclump.png
    + _build/html/tmp_2404.02275/./I13291-6229_atlas.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\arraystretch}{1.05}$</div>



<div id="title">

# Polarimetric differential imaging with VLT/NACO

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2404.02222-b31b1b.svg)](https://arxiv.org/abs/2404.02222)<mark>Appeared on: 2024-04-04</mark> -  _Accepted for publication in A&A. For more information on PIPPIN, see: this https URL . The reduced images are available on Zenodo: this https URL_

</div>
<div id="authors">

S. d. Regt, et al.

</div>
<div id="abstract">

**Abstract:** The observed diversity of exoplanets can possibly be traced back to the planet formation processes. Planet--disk interactions induce sub-structures in the circumstellar disk that can be revealed via scattered light observations. However, a high-contrast imaging technique such as polarimetric differential imaging (PDI) must first be applied to suppress the stellar diffraction halo. In this work we present the PDI PiPelIne for NACO data (PIPPIN), which reduces the archival polarimetric observations made with the NACO instrument at the Very Large Telescope. Prior to this work, such a comprehensive pipeline to reduce polarimetric NACO data did not exist. We identify a total of 243 datasets of 57 potentially young stellar objects observed before NACO's decommissioning. The PIPPIN pipeline applies various levels of instrumental polarisation correction and is capable of reducing multiple observing setups, including half-wave plate or de-rotator usage and wire-grid observations. A novel template-matching method is applied to assess the detection significance of polarised signals in the reduced data. In 22 of the 57 observed targets, we detect polarised light resulting from a scattering of circumstellar dust. The detections exhibit a collection of known sub-structures, including rings, gaps, spirals, shadows, and in- or outflows of material. Since NACO was equipped with a near-infrared wavefront sensor, it made unique polarimetric observations of a number of embedded protostars. This is the first time detections of the Class I objects Elia 2-21 and YLW 16A  have been published. Alongside the outlined PIPPIN pipeline, we publish an archive of the reduced data products, thereby improving the accessibility of these data for future studies.

</div>

<div id="div_fig1">

<img src="tmp_2404.02222/./plots/figure_gallery.png" alt="Fig6" width="100%"/>

**Figure 6. -** Gallery of young systems detected with NACO and reduced with PIPPIN. Each panel shows the polarised light on a logarithmic scale ranging between different values to highlight sub-structures. The highest degree of $IP$ correction is used where possible. Scale bars in the lower-left corners of each panel indicate $100 \mathrm{AU}$ at each object's distance. HD 169142, R CrA, and Parsamian 21 are shown in the H band, MP Mus is shown in the IB\_2.06 filter, and the other panels use Ks-band observations. Mon R2 IRS 3 shows the median $I_Q$ image because the Stokes $U$ component was not observed. The images of YLW 16A and Elia 2-21 present the first polarised light detections in the NACO observations. (*fig:gallery*)

</div>
<div id="div_fig2">

<img src="tmp_2404.02222/./plots/figure_Q_U.png" alt="Fig2" width="100%"/>

**Figure 2. -** Median Stokes $Q$ and $U$ images with different levels of $IP$ corrections for HD 135344B Ks-band observations. _From top to bottom_: $Q^+$ and $U^+$ components after equalising the ordinary and extra-ordinary fluxes, $Q$ and $U$ resulting from the double-difference method, $Q_\mathrm{IPS}$ and $U_\mathrm{IPS}$ after subtracting the median $IP$ within an annulus, and the crosstalk-corrected $Q_\mathrm{CTC}$ and $U_\mathrm{CTC}$ components where the reduced Stokes $U$ efficiency is accounted for. The characteristic butterfly pattern is visible in each panel, and the compasses show the orientation of the detector and the sky. (*fig:QU*)

</div>
<div id="div_fig3">

<img src="tmp_2404.02222/./plots/figure_PI_vs_m_J.png" alt="Fig7" width="100%"/>

**Figure 7. -** Polarised-to-stellar light contrast, $\delta_\mathrm{pol}$, plotted against the apparent J-band magnitude. The _right panel_ shows a zoomed-in view of the bright $m_\mathrm{J}$. The object names are listed along the top axes. The marker colours and symbols specify the observing filter and object type, respectively. Upper limits are shown when the stellar PSF was determined to be saturated. The error bars show the $3\sigma$ uncertainties. The grey shaded region shows the approximate magnitudes ($m_\mathrm{J}\gtrsim10$) inaccessible by the SPHERE AO system. (*fig:disk_contrast_vs_m_J*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2404.02222"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\massrate}{M_{\odot} yr^{-1}}$
$\newcommand{\hi}{H\textsc{i}}$
$\newcommand{\hii}{H\textsc{ii}}$
$\newcommand{\msun}{ M_\odot}$
$\newcommand{\lsun}{ L_\odot}$
$\newcommand{\kms}{km s^{-1}}$
$\newcommand{\jybeam}{Jy beam^{-1}}$
$\newcommand{\mjybeam}{mJy beam^{-1}}$
$\newcommand{\hmole}{H_2}$
$\newcommand{◦ee}{^{\circ}}$
$\newcommand{\parcsec}{\mbox{.\!\!\arcsec}}$
$\newcommand{\ssstyle}{\scriptscriptstyle}$
$\newcommand{\htco}{H_2CO}$
$\newcommand{\arraystretch}{1.8}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1.5}$</div>



<div id="title">

# The ALMA-QUARKS Survey: II. the ACA 1.3 mm continuum source catalog and the assembly of dense gas in massive star-forming clumps

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2404.02275-b31b1b.svg)](https://arxiv.org/abs/2404.02275)<mark>Appeared on: 2024-04-04</mark> -  _24 pages, 7 figures. Accepted for publication in Research in Astronomy and Astrophysics. QUARKS atlas link: this https URL_

</div>
<div id="authors">

F. X. (许峰玮), et al. -- incl., <mark>S. Li</mark>

</div>
<div id="abstract">

**Abstract:** Leveraging the high resolution, sensitivity, and wide frequency coverage of the Atacama Large Millimeter/submillimeter Array (ALMA), the QUARKS survey, standing for `Querying Underlying mechanisms of massive star formation with ALMA-Resolved gas Kinematics and Structures', is observing 139 massive star-forming clumps at ALMA Band 6 ( $\lambda\sim$ 1.3 mm). This paper introduces the Atacama Compact Array (ACA) 7-m data of the QUARKS survey, describing the ACA observations and data reduction. Combining multi-wavelength data, we provide the first edition of QUARKS atlas, offering insights into the multiscale and multiphase interstellar medium (ISM) in high-mass star formation. The ACA 1.3 mm catalog includes 207 continuum sources that are called ACA sources. Their gas kinetic temperatures are estimated using three formaldehyde transitions with a non-LTE radiation transfer model, and the mass and density are derived from a dust emission model. The ACA sources are massive (16--84 percentile values of 6--160 $\msun$ ), gravity-dominated ( $M\propto R^{1.1}$ ) fragments within massive clumps, with supersonic turbulence ( $\mathcal{M}>1$ ) and embedded star-forming protoclusters. We find a linear correlation between the masses of the fragments and the massive clumps, with a ratio of 6 \% between the two. When considering the fragments as representative of dense gas, the ratio indicates a dense gas fraction (DGF) of 6 \% , although with a wide scatter ranging from 1 \% to 10 \% . If we consider the QUARKS massive clumps to be what is observed at various scales, then the size-independent DGF indicates a self-similar fragmentation or collapsing mode in protocluster formation. With the ACA data over four orders of magnitude of luminosity-to-mass ratio ( $L/M$ ), we find that the DGF increases significantly with $L/M$ , which indicates clump evolutionary stage. We observed a limited fragmentation at the subclump scale, which can be explained by dynamic global collapse process.

</div>

<div id="div_fig1">

<img src="tmp_2404.02275/./stats.png" alt="Fig3" width="100%"/>

**Figure 3. -** Histograms of (a) kinetic temperature $T_{\rm kin}$, (b) source mass $M_{\rm source}$, (c) surface density, (d) Mach number $\mathcal{M}$, (e) source size $R_{\rm dec}$, and (f) volume density $n_{\rm H_2}$.
 (*fig:stats*)

</div>
<div id="div_fig2">

<img src="tmp_2404.02275/./DGF_Rclump.png" alt="Fig6" width="100%"/>

**Figure 6. -** Dense gas fraction (DGF) versus clump radius ($R_{\rm clump}$) of (a) the QUARKS sample and (b) the ASHES sample. The hexagons indicate the probability distributions of data points. The colored stars show the median values with errorbars in the $R_{\rm clump}$ bins.
 (*fig:DGF_Rclump*)

</div>
<div id="div_fig3">

<img src="tmp_2404.02275/./I13291-6229_atlas.png" alt="Fig1" width="100%"/>

**Figure 1. -** QUARKS multi-band atlas of representative source I13291-6229. _Left panel_: the background is the Spitzer 3.6/4.5/8 $\mu$m pseudo color map, overlaid with Herschel 500 $\mu$m (white contours) and MeerKAT Galactic Plane Survey (MGPS) 1.28 GHz data (yellow contours). The red circle indicates the field of view ($\sim80$\arcsec) of the combined ATOMS 12m + ACA 3 mm continuum data. _Middle panel_: the background is the ATOMS combined 3 mm continuum data, linearly scaled from $-9\sigma$ to $9\sigma$ and logarithmically scaled from $9\sigma$ to peak intensity. The source IDs are in order from North to South, and the nomenclature follows "\#Field\_ATOMS\#ID". The green dashed circle(s) indicate the QUARKS pointing(s), with size of 7-m primary beam response of 0.2. The ATOMS beam size is shown on the bottom left. _Right panel_: the background is the QUARKS ACA 1.3 mm continuum data, linearly scaled from $-3\sigma$ to peak intensity. The continuum sources are shown as red ellipses (SNR$>9$) and yellow ellipses (SNR$<9$). The source IDs are in order from North to South and the nomenclature follows "\#Field\_ACA\#ID". The QUARKS beam size is shown in the bottom left. The scale bars in three panels are shown on the bottom right. (*fig:atlas_example*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2404.02275"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

396  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

3  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
